solrcloud分布式搜索对文档内容的索引java
2014-09-11 14:55
429 查看
addDoc:
package solr.addDocument;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.ResourceBundle;
import org.apache.log4j.Logger;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.impl.CloudSolrServer;
import org.apache.solr.client.solrj.impl.HttpSolrServer;
import org.apache.solr.client.solrj.request.AbstractUpdateRequest;
import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest;
/**
* =用sql语句查询各个类别的文件(环评批复,报告书。。。),把路径存在list里面,在同一调用不同的core生成各个的索引,
* 查询结合类SolrJSearcheDemo即可。
*
* @author Administrator
*
*/
public class TestCreate {
private static Logger log = Logger.getLogger(TestCreate.class);
/**
* 生成文件索引方法
*
* @param id
* @param fileurl
* @param filename
* @param homename
* @param mytitle
* @param savetime
* @param myindextype
* @throws Exception
*/
public static void main(String args[]) {
// File file = new File("D:\\logs");
try {
indexFilesSolr("C:/Users/gnet/Desktop/SRCA根证书安装说明手册.doc", "C:/Users/gnet/Desktop/SRCA根证书安装说明手册.doc", "SRCA根证书安装说明手册.doc", "","filecore",
"2014-09-11 15:19:06", "B", "2014" );
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public static void indexFilesSolr(String id, String fileurl,
String filename, String homename, String mytitle, String savetime,
String myindextype, String myyears) throws Exception {
String zkHost = "192.168.4.77:2181";
String defaultCollection = "collection1";
CloudSolrServer server = new CloudSolrServer(zkHost);
server.setDefaultCollection(defaultCollection);
// SolrServer solr=new HttpSolrServer(getServerurl()+homename);
ContentStreamUpdateRequest up = new ContentStreamUpdateRequest(
"/update/extract");
String contenttype = getFileContentType(filename);
if (!contenttype.equals("othertype")) {
File file = new File(fileurl);
if (file.exists()) {
log.info("开始建索引:" + fileurl);
up.addFile(file, contenttype);
up.setParam("literal.id", id);
up.setParam("literal.mytitle", mytitle);
up.setParam("literal.mytime", dataTurntoLong(savetime));
up.setParam("literal.myindextype", myindextype);
up.setParam("literal.myyears", myyears);
up.setParam("fmap.content", "content");
up.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true);
server.request(up);
log.info("结束建索引:" + fileurl);
} else {
// log.info("文件不存在");
}
}
}
/**
* 获得毫秒数
*
* @param date
* @return
*/
public static String dataTurntoLong(String date) {
Date d = null;
try {
d = new SimpleDateFormat("yyyyMMddHHmmss").parse(date);
} catch (ParseException e) {
e.printStackTrace();
}
return String.valueOf(d.getTime());
}
/**
* 获取系统路径
*
* @return
*/
public static String getServerurl() {
ResourceBundle res = ResourceBundle.getBundle("solrserver");
return res.getString("serverurl");
}
/**
* 根据文件名获取文件的ContentType类型
*
* @param filename
* @return
*/
public static String getFileContentType(String filename) {
String contentType = "";
String prefix = filename.substring(filename.lastIndexOf(".") + 1);
if (prefix.equals("xlsx")) {
contentType = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";
} else if (prefix.equals("pdf")) {
contentType = "application/pdf";
} else if (prefix.equals("doc")) {
contentType = "application/msword";
} else if (prefix.equals("txt")) {
contentType = "text/plain";
} else if (prefix.equals("xls")) {
contentType = "application/vnd.ms-excel";
} else if (prefix.equals("docx")) {
contentType = "application/vnd.openxmlformats-officedocument.wordprocessingml.document";
} else if (prefix.equals("ppt")) {
contentType = "application/vnd.ms-powerpoint";
} else if (prefix.equals("pptx")) {
contentType = "application/vnd.openxmlformats-officedocument.presentationml.presentation";
}
else {
contentType = "othertype";
}
return contentType;
}
/**
* 返回文件ContentType
*
* @param paths
* @return
*/
public static String getContentType(String paths) {
Path path = Paths.get(paths);
String contentType = null;
try {
contentType = Files.probeContentType(path);
} catch (IOException e) {
e.printStackTrace();
}
log.info("文件类型 : " + contentType);
return contentType;
}
// public static void main(String args[]) {
// File file = new File("D:\\logs");
// indexFilesSolr(path, path, a, "filecore", a,
// "2014-09-11 15:19:06", type, "200" + i);
// String files[] = file.list();
// for (int i = 0; i < files.length; i++) {
// String a = files[i];
// String path = "D:/logs/" + files[i];
// String type = "A";
// try {
// if (i % 2 == 0) {
// type = "B";
// }
// indexFilesSolr(path, path, a, "filecore", a,
// "2014-09-11 15:19:06", type, "200" + i);
// } catch (Exception e) {
// e.printStackTrace();
// }
// }
// }
}
addindex:
package solr.addindex;
import java.io.IOException;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.CloudSolrServer;
import org.apache.solr.common.SolrInputDocument;
public class SolrCloudSolrjPopulator {
public static void main(String[] args) throws IOException, SolrServerException {
String zkHost = "192.168.233.128:2181";
String defaultCollection = "collection1";
CloudSolrServer server = new CloudSolrServer(zkHost);
server.setDefaultCollection(defaultCollection);
for (int i = 0; i < 1000; ++i) {
SolrInputDocument doc = new SolrInputDocument();
doc.addField("cat", "book");
doc.addField("id", "book-" + i);
doc.addField("name", "The Legend of Po part " + i);
server.add(doc);
if (i % 100 == 0)
System.out.println(i);
server.commit(); // periodically flush
}
server.commit();
}
}
search:
import java.net.MalformedURLException;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.CloudSolrServer;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.params.ModifiableSolrParams;
public class SolrCloudSolrJSearcher {
public static void main(String[] args) throws MalformedURLException,
SolrServerException {
String zkHost = "localhost:2181";
String defaultCollection = "collection1";
CloudSolrServer solr = new CloudSolrServer(zkHost);
solr.setDefaultCollection(defaultCollection);
ModifiableSolrParams params = new ModifiableSolrParams();
params.set("q", "cat:electronics");
params.set("defType", "edismax");
params.set("start", "0");
QueryResponse response = solr.query(params);
SolrDocumentList results = response.getResults();
for (int i = 0; i < results.size(); ++i) {
System.out.println(results.get(i));
}
}
}
package solr.addDocument;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.ResourceBundle;
import org.apache.log4j.Logger;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.impl.CloudSolrServer;
import org.apache.solr.client.solrj.impl.HttpSolrServer;
import org.apache.solr.client.solrj.request.AbstractUpdateRequest;
import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest;
/**
* =用sql语句查询各个类别的文件(环评批复,报告书。。。),把路径存在list里面,在同一调用不同的core生成各个的索引,
* 查询结合类SolrJSearcheDemo即可。
*
* @author Administrator
*
*/
public class TestCreate {
private static Logger log = Logger.getLogger(TestCreate.class);
/**
* 生成文件索引方法
*
* @param id
* @param fileurl
* @param filename
* @param homename
* @param mytitle
* @param savetime
* @param myindextype
* @throws Exception
*/
public static void main(String args[]) {
// File file = new File("D:\\logs");
try {
indexFilesSolr("C:/Users/gnet/Desktop/SRCA根证书安装说明手册.doc", "C:/Users/gnet/Desktop/SRCA根证书安装说明手册.doc", "SRCA根证书安装说明手册.doc", "","filecore",
"2014-09-11 15:19:06", "B", "2014" );
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public static void indexFilesSolr(String id, String fileurl,
String filename, String homename, String mytitle, String savetime,
String myindextype, String myyears) throws Exception {
String zkHost = "192.168.4.77:2181";
String defaultCollection = "collection1";
CloudSolrServer server = new CloudSolrServer(zkHost);
server.setDefaultCollection(defaultCollection);
// SolrServer solr=new HttpSolrServer(getServerurl()+homename);
ContentStreamUpdateRequest up = new ContentStreamUpdateRequest(
"/update/extract");
String contenttype = getFileContentType(filename);
if (!contenttype.equals("othertype")) {
File file = new File(fileurl);
if (file.exists()) {
log.info("开始建索引:" + fileurl);
up.addFile(file, contenttype);
up.setParam("literal.id", id);
up.setParam("literal.mytitle", mytitle);
up.setParam("literal.mytime", dataTurntoLong(savetime));
up.setParam("literal.myindextype", myindextype);
up.setParam("literal.myyears", myyears);
up.setParam("fmap.content", "content");
up.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true);
server.request(up);
log.info("结束建索引:" + fileurl);
} else {
// log.info("文件不存在");
}
}
}
/**
* 获得毫秒数
*
* @param date
* @return
*/
public static String dataTurntoLong(String date) {
Date d = null;
try {
d = new SimpleDateFormat("yyyyMMddHHmmss").parse(date);
} catch (ParseException e) {
e.printStackTrace();
}
return String.valueOf(d.getTime());
}
/**
* 获取系统路径
*
* @return
*/
public static String getServerurl() {
ResourceBundle res = ResourceBundle.getBundle("solrserver");
return res.getString("serverurl");
}
/**
* 根据文件名获取文件的ContentType类型
*
* @param filename
* @return
*/
public static String getFileContentType(String filename) {
String contentType = "";
String prefix = filename.substring(filename.lastIndexOf(".") + 1);
if (prefix.equals("xlsx")) {
contentType = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";
} else if (prefix.equals("pdf")) {
contentType = "application/pdf";
} else if (prefix.equals("doc")) {
contentType = "application/msword";
} else if (prefix.equals("txt")) {
contentType = "text/plain";
} else if (prefix.equals("xls")) {
contentType = "application/vnd.ms-excel";
} else if (prefix.equals("docx")) {
contentType = "application/vnd.openxmlformats-officedocument.wordprocessingml.document";
} else if (prefix.equals("ppt")) {
contentType = "application/vnd.ms-powerpoint";
} else if (prefix.equals("pptx")) {
contentType = "application/vnd.openxmlformats-officedocument.presentationml.presentation";
}
else {
contentType = "othertype";
}
return contentType;
}
/**
* 返回文件ContentType
*
* @param paths
* @return
*/
public static String getContentType(String paths) {
Path path = Paths.get(paths);
String contentType = null;
try {
contentType = Files.probeContentType(path);
} catch (IOException e) {
e.printStackTrace();
}
log.info("文件类型 : " + contentType);
return contentType;
}
// public static void main(String args[]) {
// File file = new File("D:\\logs");
// indexFilesSolr(path, path, a, "filecore", a,
// "2014-09-11 15:19:06", type, "200" + i);
// String files[] = file.list();
// for (int i = 0; i < files.length; i++) {
// String a = files[i];
// String path = "D:/logs/" + files[i];
// String type = "A";
// try {
// if (i % 2 == 0) {
// type = "B";
// }
// indexFilesSolr(path, path, a, "filecore", a,
// "2014-09-11 15:19:06", type, "200" + i);
// } catch (Exception e) {
// e.printStackTrace();
// }
// }
// }
}
addindex:
package solr.addindex;
import java.io.IOException;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.CloudSolrServer;
import org.apache.solr.common.SolrInputDocument;
public class SolrCloudSolrjPopulator {
public static void main(String[] args) throws IOException, SolrServerException {
String zkHost = "192.168.233.128:2181";
String defaultCollection = "collection1";
CloudSolrServer server = new CloudSolrServer(zkHost);
server.setDefaultCollection(defaultCollection);
for (int i = 0; i < 1000; ++i) {
SolrInputDocument doc = new SolrInputDocument();
doc.addField("cat", "book");
doc.addField("id", "book-" + i);
doc.addField("name", "The Legend of Po part " + i);
server.add(doc);
if (i % 100 == 0)
System.out.println(i);
server.commit(); // periodically flush
}
server.commit();
}
}
search:
import java.net.MalformedURLException;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.CloudSolrServer;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.params.ModifiableSolrParams;
public class SolrCloudSolrJSearcher {
public static void main(String[] args) throws MalformedURLException,
SolrServerException {
String zkHost = "localhost:2181";
String defaultCollection = "collection1";
CloudSolrServer solr = new CloudSolrServer(zkHost);
solr.setDefaultCollection(defaultCollection);
ModifiableSolrParams params = new ModifiableSolrParams();
params.set("q", "cat:electronics");
params.set("defType", "edismax");
params.set("start", "0");
QueryResponse response = solr.query(params);
SolrDocumentList results = response.getResults();
for (int i = 0; i < results.size(); ++i) {
System.out.println(results.get(i));
}
}
}
相关文章推荐
- java之全文索引搜索lucene之增删改查文档
- java之全文索引搜索lucene之增删改查文档与中文分词搜索
- java 技术文档 用java获得word,excel,pdf等文档的内容
- 用JAVA创建XML文档,为java API文档添加搜索功能
- Java程序员从笨鸟到菜鸟之(一百零三)java操作office和pdf文件(一)java读取word,excel和pdf文档内容
- Java使用PDFBox开发包实现对PDF文档内容编辑与保存
- Java使用PDFBox开发包实现对PDF文档内容编辑与保存
- 用java获得word,excel,pdf等文档的内容
- [java] 获取pdf/word文档文本内容
- JAVA操作XML(3)--读取XML文档的内容,并将内容显示在浏览器上
- 在shell帮助文档中搜索内容
- lucene(全文搜索)_根据内容建立索引_源码下载
- 用java获得word,excel,pdf文档的内容
- 搜索文本内容——Java代码的简单实现(修改版)
- org.apache.lucene.search.Hits.java搜索索引
- lotus后台java代理中对文档的搜索
- 为Flash建搜索内容索引
- 用递归的方式实现文件内容搜索(java)
- 基于tm-extractor的Word文档内容搜索软件开发
- Java程序员从笨鸟到菜鸟之(一百零三)java操作office和pdf文件(一)java读取word,excel和pdf文档内容