Lucene+ik的分页和多条件模糊交集搜索
2015-09-23 18:02
387 查看
jar包支持http://download.csdn.net/detail/isresultxal/9133977
[code]package com.sgfm.datacenter.sys; import java.io.File; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.queryParser.QueryParser.Operator; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Sort; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopScoreDocCollector; import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import org.wltea.analyzer.lucene.IKAnalyzer; import com.sgfm.datacenter.dao.product.ProductDao; import com.sgfm.datacenter.util.CommUtil; import com.sgfm.datacenter.util.SpringContext; public class LuceneData { static String productIndex = "D:\\luceneIndex\\product"; static String storeIndex = "D:\\luceneIndex\\store"; /** * 创建商品索引(没有则新建,有则更新) */ public static void createProductIndex(){ try { IndexWriter writer = null; // 这里放索引文件的位置 File indexdir = new File(productIndex); // 放在硬盘上可以用FSDirectory(),放在内存的用RAMDirectory()不过一关机就没了 Directory dir = FSDirectory.open(indexdir); //创建一个中文分词器 Analyzer analyzer = new IKAnalyzer(); // 创建一个语法分析器 //Analyzer luceneAnalyzer = new StandardAnalyzer(Version.LUCENE_36); //这行代码设置了存放索引的文件夹将以覆盖或者新建的方式建立。如果没有这样设置,并且在原索引文件夹中索引没有被删除的情况下,新的索引文件将会append到原来索引文件之后,这样会导致索引结果发生错误。 IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_36, analyzer); iwc.setOpenMode(OpenMode.CREATE); // 创建一个IndexWriter(存放索引文件的目录,分析器,Field的最大长度) writer = new IndexWriter(dir, iwc); writer.deleteAll(); writer.commit(); ProductDao pd = (ProductDao)SpringContext.getBean("productDao"); List<Map> productList = pd.findAllProductLucene(); for(int i = 0;i<productList.size();i++){ Document doc = new Document(); //为字段 建索引 doc.add(new Field("pid",productList.get(i).get("PID").toString(),Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS)); //主键不分词 doc.add(new Field("pno",productList.get(i).get("P_NO").toString(),Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS)); // 编号不分词 doc.add(new Field("city",productList.get(i).get("CITY").toString(),Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS)); //城市不分词 doc.add(new Field("pname",productList.get(i).get("PNAME").toString(),Field.Store.YES,Field.Index.ANALYZED)); doc.add(new Field("bname",productList.get(i).get("BNAME").toString(),Field.Store.YES,Field.Index.ANALYZED)); doc.add(new Field("product_dec",productList.get(i).get("PRODUCT_DEC").toString(),Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));//商品详细说明不需要分词 doc.add(new Field("count",productList.get(i).get("COUNT").toString(),Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS)); //销售量不分词 doc.add(new Field("img_s",productList.get(i).get("IMG_S").toString(),Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS)); //图片路径不分词 doc.add(new Field("shop_price",productList.get(i).get("SHOP_PRICE").toString(),Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS)); //销售价格不分词 doc.add(new Field("market_price",productList.get(i).get("MARKET_PRICE").toString(),Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS)); //原价不分词 doc.add(new Field("discount",productList.get(i).get("DISCOUNT").toString(),Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS)); //折扣 writer.addDocument(doc); } writer.close(); } catch (Exception e) { // TODO: handle exception } } /** * 商品多条件查询 * @param keyWord * @return */ public static ArrayList searchProductIndexMore(String keyWord){ ArrayList list = new ArrayList(); try { File indexdir = new File(productIndex); Directory dir = FSDirectory.open(indexdir); Analyzer analyzer = new IKAnalyzer(true); IndexSearcher searcher = new IndexSearcher(dir); //BooleanClause.Occur.MUST表示and, feedom.net //BooleanClause.Occur.MUST_NOT表示not, 54com.cn //BooleanClause.Occur.SHOULD表示or. BooleanQuery booleanQuery = new BooleanQuery(); QueryParser parser1 = new QueryParser(Version.LUCENE_36,"pname",analyzer); Query titleQuery = parser1 .parse(keyWord); booleanQuery.add(titleQuery,BooleanClause.Occur.SHOULD); QueryParser parser2 = new QueryParser(Version.LUCENE_36,"bname",analyzer); Query contentQuery = parser2 .parse(keyWord); booleanQuery.add(contentQuery ,BooleanClause.Occur.SHOULD); TopDocs topDocs = searcher.search(booleanQuery, 100); ScoreDoc[] hits = topDocs.scoreDocs; for(int i=0;i< hits.length;i++){ int DocId = hits[i].doc; Document doc = searcher.doc(DocId); System.out.println("pid:"+doc.get("pid")+";pname:"+doc.get("pname")+";bname:"+doc.get("bname")+";city:"+doc.get("city")); list.add(doc); } System.out.println("满足结果记录条数:" + topDocs.totalHits); } catch (Exception e) { // TODO: handle exception } return list; } /** * 商品查询并分页 * @param keyWord 条件 * @param pageSize 每页的内容数量 * @param curpage 页数 * @return */ public static ArrayList searchProductIndexMorePage(String city,String keyWord,int pageSize,int curpage){ ArrayList list = new ArrayList(); try { //int pageSize = 5; //每页记录数 //int curpage = 2; //当前页 int start = (curpage - 1) * pageSize; int rowCount; //总条数 int pages; //总页数 File indexdir = new File(productIndex); Directory dir = FSDirectory.open(indexdir); IndexReader reader = IndexReader.open(dir); Analyzer analyzer = new IKAnalyzer(true); IndexSearcher searcher = new IndexSearcher(dir); //BooleanClause.Occur.MUST表示and, feedom.net //BooleanClause.Occur.MUST_NOT表示not, 54com.cn //BooleanClause.Occur.SHOULD表示or. BooleanQuery booleanQuery = new BooleanQuery(); if(keyWord.equals("")){ booleanQuery.add(new TermQuery(new Term("city",city)),BooleanClause.Occur.MUST); }else{ /*QueryParser parser1 = new QueryParser(Version.LUCENE_36,"city",analyzer); Query cityQuery = parser1 .parse(city); booleanQuery.add(cityQuery,BooleanClause.Occur.MUST); QueryParser parser2 = new QueryParser(Version.LUCENE_36,"pname",analyzer); Query pnameQuery = parser2 .parse(keyWord); booleanQuery.add(pnameQuery,BooleanClause.Occur.SHOULD); QueryParser parser3 = new QueryParser(Version.LUCENE_36,"bname",analyzer); Query bnameQuery = parser3 .parse(keyWord); booleanQuery.add(bnameQuery ,BooleanClause.Occur.SHOULD); */ BooleanQuery q1 = new BooleanQuery(); QueryParser parser1 = new QueryParser(Version.LUCENE_36,"pname",analyzer); parser1.setDefaultOperator(Operator.AND); //搜索条件分词取得的索引数据的交集 Query pnameQuery = parser1 .parse(keyWord); QueryParser parser2 = new QueryParser(Version.LUCENE_36,"bname",analyzer); parser2.setDefaultOperator(Operator.AND); //搜索条件分词取得的索引数据的交集 Query bnameQuery = parser2 .parse(keyWord); q1.add(pnameQuery,BooleanClause.Occur.SHOULD); q1.add(bnameQuery,BooleanClause.Occur.SHOULD); q1.add(new WildcardQuery(new Term("pno","*"+keyWord.toUpperCase()+"*")),BooleanClause.Occur.SHOULD); booleanQuery.add(new TermQuery(new Term("city",city)),BooleanClause.Occur.MUST); booleanQuery.add(q1,BooleanClause.Occur.MUST); } int hm = start + pageSize; TopScoreDocCollector res = TopScoreDocCollector.create(hm, false); searcher.search(booleanQuery, res); rowCount = res.getTotalHits(); pages = (rowCount - 1) / pageSize + 1; //计算总页数 System.out.println("rowCount:" + rowCount); System.out.println("pages:"+pages); TopDocs tds = res.topDocs(start, pageSize); ScoreDoc[] hits = tds.scoreDocs; for(int i=0;i< hits.length;i++){ int DocId = hits[i].doc; Document doc = searcher.doc(DocId); //System.out.println("pid:"+doc.get("pid")+";pname:"+doc.get("pname")+";bname:"+doc.get("bname")+";city:"+doc.get("city")); Map productMap = new HashMap(); if(i==0){ productMap.put("rowCount", rowCount); productMap.put("pages", pages); } productMap.put("pid", doc.get("pid")); productMap.put("pname", doc.get("pname")); productMap.put("bname", doc.get("bname")); productMap.put("product_dec", doc.get("product_dec")); productMap.put("count", doc.get("count")); productMap.put("img_s", doc.get("img_s")); productMap.put("shop_price", doc.get("shop_price")); productMap.put("market_price", doc.get("market_price")); productMap.put("discount", doc.get("discount")); list.add(productMap); } } catch (Exception e) { // TODO: handle exception } return list; } }
相关文章推荐
- 项目经验总结
- JS调用中文乱码怎么办
- eclipse上cvs服务器端的配置
- mysql replication(主从复制)(三)GTIDs
- nyoj--44--子串和(动态规划)
- 生活中面向对象的实例
- android studio编译项目,9 patch图片报错Crunching Cruncher
- 技术博客网址
- 自定义View----Android九宫格手势密码解锁
- 2015年9月10-11日,杨学明老师《IPD DRY RUN》专题培训在武汉某上市企业成功举办!
- nyoj--44--子串和(动态规划)
- 笨蛋难题四
- 如何在面试中发现优秀程序员
- UVA - 1586 Molar mass
- 第四周项目三 数据结构实践(二)——单链表:连接
- 常见异常备忘
- android工具下载
- Android圆角button
- HDU 2955 Robberies(01背包+概率)
- 平台部署