您的位置:首页 > 编程语言 > Java开发

solrcloud分布式搜索对文档内容的索引java

2014-09-11 14:55 429 查看
addDoc:

package solr.addDocument;

import java.io.File;

import java.io.IOException;

import java.nio.file.Files;

import java.nio.file.Path;

import java.nio.file.Paths;

import java.text.ParseException;

import java.text.SimpleDateFormat;

import java.util.Date;

import java.util.ResourceBundle;

import org.apache.log4j.Logger;

import org.apache.solr.client.solrj.SolrServer;

import org.apache.solr.client.solrj.impl.CloudSolrServer;

import org.apache.solr.client.solrj.impl.HttpSolrServer;

import org.apache.solr.client.solrj.request.AbstractUpdateRequest;

import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest;

/**

* =用sql语句查询各个类别的文件(环评批复,报告书。。。),把路径存在list里面,在同一调用不同的core生成各个的索引,

* 查询结合类SolrJSearcheDemo即可。

*

* @author Administrator

*

*/

public class TestCreate {

private static Logger log = Logger.getLogger(TestCreate.class);

/**

* 生成文件索引方法

*

* @param id

* @param fileurl

* @param filename

* @param homename

* @param mytitle

* @param savetime

* @param myindextype

* @throws Exception

*/

public static void main(String args[]) {

// File file = new File("D:\\logs");

try {

indexFilesSolr("C:/Users/gnet/Desktop/SRCA根证书安装说明手册.doc", "C:/Users/gnet/Desktop/SRCA根证书安装说明手册.doc", "SRCA根证书安装说明手册.doc", "","filecore",

"2014-09-11 15:19:06", "B", "2014" );

} catch (Exception e) {

// TODO Auto-generated catch block

e.printStackTrace();

}

}

public static void indexFilesSolr(String id, String fileurl,

String filename, String homename, String mytitle, String savetime,

String myindextype, String myyears) throws Exception {

String zkHost = "192.168.4.77:2181";

String defaultCollection = "collection1";

CloudSolrServer server = new CloudSolrServer(zkHost);

server.setDefaultCollection(defaultCollection);

// SolrServer solr=new HttpSolrServer(getServerurl()+homename);

ContentStreamUpdateRequest up = new ContentStreamUpdateRequest(

"/update/extract");

String contenttype = getFileContentType(filename);

if (!contenttype.equals("othertype")) {

File file = new File(fileurl);

if (file.exists()) {

log.info("开始建索引:" + fileurl);

up.addFile(file, contenttype);

up.setParam("literal.id", id);

up.setParam("literal.mytitle", mytitle);

up.setParam("literal.mytime", dataTurntoLong(savetime));

up.setParam("literal.myindextype", myindextype);

up.setParam("literal.myyears", myyears);

up.setParam("fmap.content", "content");

up.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true);

server.request(up);

log.info("结束建索引:" + fileurl);

} else {

// log.info("文件不存在");

}

}

}

/**

* 获得毫秒数

*

* @param date

* @return

*/

public static String dataTurntoLong(String date) {

Date d = null;

try {

d = new SimpleDateFormat("yyyyMMddHHmmss").parse(date);

} catch (ParseException e) {

e.printStackTrace();

}

return String.valueOf(d.getTime());

}

/**

* 获取系统路径

*

* @return

*/

public static String getServerurl() {

ResourceBundle res = ResourceBundle.getBundle("solrserver");

return res.getString("serverurl");

}

/**

* 根据文件名获取文件的ContentType类型

*

* @param filename

* @return

*/

public static String getFileContentType(String filename) {

String contentType = "";

String prefix = filename.substring(filename.lastIndexOf(".") + 1);

if (prefix.equals("xlsx")) {

contentType = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";

} else if (prefix.equals("pdf")) {

contentType = "application/pdf";

} else if (prefix.equals("doc")) {

contentType = "application/msword";

} else if (prefix.equals("txt")) {

contentType = "text/plain";

} else if (prefix.equals("xls")) {

contentType = "application/vnd.ms-excel";

} else if (prefix.equals("docx")) {

contentType = "application/vnd.openxmlformats-officedocument.wordprocessingml.document";

} else if (prefix.equals("ppt")) {

contentType = "application/vnd.ms-powerpoint";

} else if (prefix.equals("pptx")) {

contentType = "application/vnd.openxmlformats-officedocument.presentationml.presentation";

}

else {

contentType = "othertype";

}

return contentType;

}

/**

* 返回文件ContentType

*

* @param paths

* @return

*/

public static String getContentType(String paths) {

Path path = Paths.get(paths);

String contentType = null;

try {

contentType = Files.probeContentType(path);

} catch (IOException e) {

e.printStackTrace();

}

log.info("文件类型 : " + contentType);

return contentType;

}

// public static void main(String args[]) {

// File file = new File("D:\\logs");

// indexFilesSolr(path, path, a, "filecore", a,

// "2014-09-11 15:19:06", type, "200" + i);

// String files[] = file.list();

// for (int i = 0; i < files.length; i++) {

// String a = files[i];

// String path = "D:/logs/" + files[i];

// String type = "A";

// try {

// if (i % 2 == 0) {

// type = "B";

// }

// indexFilesSolr(path, path, a, "filecore", a,

// "2014-09-11 15:19:06", type, "200" + i);

// } catch (Exception e) {

// e.printStackTrace();

// }

// }

// }

}

addindex:

package solr.addindex;

import java.io.IOException;

import org.apache.solr.client.solrj.SolrServerException;

import org.apache.solr.client.solrj.impl.CloudSolrServer;

import org.apache.solr.common.SolrInputDocument;

public class SolrCloudSolrjPopulator {

public static void main(String[] args) throws IOException, SolrServerException {

String zkHost = "192.168.233.128:2181";

String defaultCollection = "collection1";

CloudSolrServer server = new CloudSolrServer(zkHost);

server.setDefaultCollection(defaultCollection);

for (int i = 0; i < 1000; ++i) {

SolrInputDocument doc = new SolrInputDocument();

doc.addField("cat", "book");

doc.addField("id", "book-" + i);

doc.addField("name", "The Legend of Po part " + i);

server.add(doc);

if (i % 100 == 0)

System.out.println(i);

server.commit(); // periodically flush

}

server.commit();

}

}

search:

import java.net.MalformedURLException;

import org.apache.solr.client.solrj.SolrServerException;

import org.apache.solr.client.solrj.impl.CloudSolrServer;

import org.apache.solr.client.solrj.response.QueryResponse;

import org.apache.solr.common.SolrDocumentList;

import org.apache.solr.common.params.ModifiableSolrParams;

public class SolrCloudSolrJSearcher {

public static void main(String[] args) throws MalformedURLException,

SolrServerException {

String zkHost = "localhost:2181";

String defaultCollection = "collection1";

CloudSolrServer solr = new CloudSolrServer(zkHost);

solr.setDefaultCollection(defaultCollection);

ModifiableSolrParams params = new ModifiableSolrParams();

params.set("q", "cat:electronics");

params.set("defType", "edismax");

params.set("start", "0");

QueryResponse response = solr.query(params);

SolrDocumentList results = response.getResults();

for (int i = 0; i < results.size(); ++i) {

System.out.println(results.get(i));

}

}

}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: