二、lucene2.4的增删改查及其查询语法
2013-04-12 11:13
429 查看
1、工程结构图
2、排序的几种方式
0
3、增删改查索引
4、增删改查的测试
5、分页时所用到的bean
6、lucene的查询语法
7、lucene的工具方法
工程代码:http://download.csdn.net/detail/wxwzy738/5248905
2、排序的几种方式
0
3、增删改查索引
package cn.hj.lucene.dao; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import jeasy.analysis.MMAnalyzer; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.NumberTools; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.index.IndexWriter.MaxFieldLength; import org.apache.lucene.queryParser.MultiFieldQueryParser; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.Filter; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.RangeFilter; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.Formatter; import org.apache.lucene.search.highlight.Fragmenter; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.Scorer; import org.apache.lucene.search.highlight.SimpleFragmenter; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; public class IndexDao { String indexPath = "D:\\Workspaces\\lucenedemo\\luceneIndex"; // Analyzer analyzer = new StandardAnalyzer(); Analyzer analyzer = new MMAnalyzer();// 词库分词 /** * 添加/创建索引 * * @param doc */ public void save(Document doc) { IndexWriter indexWriter = null; try { indexWriter = new IndexWriter(indexPath, analyzer, MaxFieldLength.LIMITED); indexWriter.addDocument(doc); } catch (Exception e) { throw new RuntimeException(e); } finally { try { indexWriter.close(); } catch (Exception e) { e.printStackTrace(); } } } /** * Term是搜索的最小单位,代表某个 Field 中的一个关键词,如:<title, lucene> * * new Term( "title", "lucene" ); * * new Term( "id", "5" ); * * new Term( "id", UUID ); * * @param term */ public void delete(Term term) { IndexWriter indexWriter = null; try { indexWriter = new IndexWriter(indexPath, analyzer, MaxFieldLength.LIMITED); indexWriter.deleteDocuments(term); } catch (Exception e) { throw new RuntimeException(e); } finally { try { indexWriter.close(); } catch (Exception e) { e.printStackTrace(); } } } /** * 更新索引 * * <pre> * indexWriter.deleteDocuments(term); * indexWriter.addDocument(doc); * </pre> * * @param term * @param doc */ public void update(Term term, Document doc) { IndexWriter indexWriter = null; try { indexWriter = new IndexWriter(indexPath, analyzer, MaxFieldLength.LIMITED); indexWriter.updateDocument(term, doc); } catch (Exception e) { throw new RuntimeException(e); } finally { try { indexWriter.close(); } catch (Exception e) { e.printStackTrace(); } } } /** * <pre> * totalPage = recordCount / pageSize; * if (recordCount % pageSize > 0) * totalPage++; * </pre> * * @param queryString * @param firstResult * @param maxResults * @return */ public QueryResult search(String queryString, int firstResult, int maxResults) { try { // 1,把要搜索的文本解析为 Query String[] fields = { "name", "content" }; Map<String, Float> boosts = new HashMap<String, Float>(); boosts.put("name", 3f);//数字越到权重越高 // boosts.put("content", 1.0f); 默认为1.0f QueryParser queryParser = new MultiFieldQueryParser(fields, analyzer, boosts); Query query = queryParser.parse(queryString); return search(query, firstResult, maxResults); } catch (Exception e) { throw new RuntimeException(e); } } public QueryResult search(Query query, int firstResult, int maxResults) { IndexSearcher indexSearcher = null; try { // 2,进行查询 indexSearcher = new IndexSearcher(indexPath); Filter filter = new RangeFilter("size", NumberTools.longToString(200) , NumberTools.longToString(1000), true, true); // ========== 排序 Sort sort = new Sort(); sort.setSort(new SortField("size")); // 默认为升序 // sort.setSort(new SortField("size", true)); // ========== TopDocs topDocs = indexSearcher.search(query, filter, 10000, sort); int recordCount = topDocs.totalHits; List<Document> recordList = new ArrayList<Document>(); // ============== 准备高亮器 Formatter formatter = new SimpleHTMLFormatter("<font color='red'>", "</font>"); Scorer scorer = new QueryScorer(query); Highlighter highlighter = new Highlighter(formatter, scorer); Fragmenter fragmenter = new SimpleFragmenter(50); highlighter.setTextFragmenter(fragmenter); // ============== // 3,取出当前页的数据 int end = Math.min(firstResult + maxResults, topDocs.totalHits); for (int i = firstResult; i < end; i++) { ScoreDoc scoreDoc = topDocs.scoreDocs[i]; int docSn = scoreDoc.doc; // 文档内部编号 Document doc = indexSearcher.doc(docSn); // 根据编号取出相应的文档 // =========== 高亮 // 返回高亮后的结果,如果当前属性值中没有出现关键字,会返回 null String hc = highlighter.getBestFragment(analyzer, "content", doc.get("content")); if (hc == null) { String content = doc.get("content"); int endIndex = Math.min(50, content.length()); hc = content.substring(0, endIndex);// 最多前50个字符 } doc.getField("content").setValue(hc); // =========== recordList.add(doc); } // 返回结果 return new QueryResult(recordCount, recordList); } catch (Exception e) { throw new RuntimeException(e); } finally { try { indexSearcher.close(); } catch (IOException e) { e.printStackTrace(); } } } }
4、增删改查的测试
package cn.hj.lucene.dao; import org.apache.lucene.document.Document; import org.apache.lucene.index.Term; import org.junit.Test; import cn.hj.lucene.utils.File2DocumentUtils; public class IndexDaoTest { String filePath = "D:\\Workspaces\\lucenedemo\\luceneDatasource\\IndexWriter addDocument's a javadoc .txt"; String filePath2 = "D:\\Workspaces\\lucenedemo\\luceneDatasource\\小笑话_总统的房间 Room .txt"; IndexDao indexDao = new IndexDao(); @Test public void testSave() { Document doc = File2DocumentUtils.file2Document(filePath); doc.setBoost(3f); indexDao.save(doc); Document doc2 = File2DocumentUtils.file2Document(filePath2); // doc2.setBoost(1.0f); indexDao.save(doc2); } @Test public void testDelete() { Term term = new Term("path", filePath); indexDao.delete(term); } @Test public void testUpdate() { Term term = new Term("path", filePath); Document doc = File2DocumentUtils.file2Document(filePath); doc.getField("content").setValue("这是更新后的文件内容"); indexDao.update(term, doc); } @Test public void testSearch() { // String queryString = "IndexWriter"; // String queryString = "房间"; // String queryString = "笑话"; String queryString = "room"; // String queryString = "content:绅士"; QueryResult qr = indexDao.search(queryString, 0, 10); System.out.println("总共有【" + qr.getRecordCount() + "】条匹配结果"); for (Document doc : qr.getRecordList()) { File2DocumentUtils.printDocument(doc); } } }
5、分页时所用到的bean
package cn.hj.lucene.dao; import java.util.List; import org.apache.lucene.document.Document; public class QueryResult { private int recordCount; private List<Document> recordList; public QueryResult(int recordCount, List<Document> recordList) { super(); this.recordCount = recordCount; this.recordList = recordList; } public int getRecordCount() { return recordCount; } public void setRecordCount(int recordCount) { this.recordCount = recordCount; } public List<Document> getRecordList() { return recordList; } public void setRecordList(List<Document> recordList) { this.recordList = recordList; } }
6、lucene的查询语法
package cn.hj.lucene.query; import org.apache.lucene.document.Document; import org.apache.lucene.document.NumberTools; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.RangeQuery; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.search.BooleanClause.Occur; import org.junit.Test; import cn.hj.lucene.dao.IndexDao; import cn.hj.lucene.dao.QueryResult; import cn.hj.lucene.utils.File2DocumentUtils; public class QueryTest { IndexDao indexDao = new IndexDao(); /** * 查询并且打印结果 * @param query */ private void queryAndPrintResult(Query query) { QueryResult qr = indexDao.search(query, 0, 100); System.out.println("总共有【"+qr.getRecordCount()+"】条匹配结果"); for(Document doc : qr.getRecordList()){ File2DocumentUtils.printDocument(doc); } } /** * 关键字查询 */ @Test public void testTermQuery(){ Term term = new Term("name","room"); Query query = new TermQuery(term); queryAndPrintResult(query); } /** * 范围查询 * 索引和搜索的时候搜索参数需要有一个相同的宽度 * 包含边界:size:[0000000000001e TO 000000000000rs] * * 不包含边界:size:{0000000000001e TO 000000000000rs} */ @Test public void testRangeQuery() { Term lowerTerm = new Term("size", NumberTools.longToString(50)); Term upperTerm = new Term("size", NumberTools.longToString(1000)); Query query = new RangeQuery(lowerTerm, upperTerm, false); queryAndPrintResult(query); } // public static void main(String[] args) { // System.out.println(Long.MAX_VALUE); // System.out.println(NumberTools.longToString(1000)); // System.out.println(NumberTools.stringToLong("000000000000rs")); // // System.out.println(DateTools.dateToString(new Date(), Resolution.DAY)); // System.out.println(DateTools.dateToString(new Date(), Resolution.MINUTE)); // System.out.println(DateTools.dateToString(new Date(), Resolution.SECOND)); // } /** * 通配符查询 * * '?' 代表一个字符, '*' 代表0个或多个字符 * * name:房* * * name:*o* * * name:roo? */ @Test public void testWildcardQuery() { Term term = new Term("name", "roo?"); // Term term = new Term("name", "ro*"); // 前缀查询 PrefixQuery // Term term = new Term("name", "*o*"); // Term term = new Term("name", "房*"); Query query = new WildcardQuery(term); queryAndPrintResult(query); } /** * 短语查询 * * content:"? 绅士 ? ? 饭店" * * content:"绅士 饭店"~2 */ @Test public void testPhraseQuery() { PhraseQuery phraseQuery = new PhraseQuery(); //表示饭店和绅士中间个两个词语隔(2,3),这个位置是相对的,只要中间隔两个即可 // phraseQuery.add(new Term("content", "绅士"), 1); // phraseQuery.add(new Term("content", "饭店"), 4); //现在我不知道查多少个,但是我知道大约查几个 phraseQuery.add(new Term("content", "绅士"));//第三个属性位置不填 -> phraseQuery.add(new Term("content", "饭店"));//->默认是012345递增 //这个表示上面两个词最多中间隔了几个词 phraseQuery.setSlop(2); queryAndPrintResult(phraseQuery); } /** * +content:"绅士 饭店"~2 -size:[000000000000dw TO 000000000000rs] * * +content:"绅士 饭店"~2 +size:[000000000000dw TO 000000000000rs] * * content:"绅士 饭店"~2 size:[000000000000dw TO 000000000000rs] * * +content:"绅士 饭店"~2 size:[000000000000dw TO 000000000000rs] */ @Test public void testBooleanQuery() { // 条件1 PhraseQuery query1 = new PhraseQuery(); query1.add(new Term("content", "绅士")); query1.add(new Term("content", "饭店")); query1.setSlop(2); // 条件2 Term lowerTerm = new Term("size", NumberTools.longToString(500)); Term upperTerm = new Term("size", NumberTools.longToString(1000)); Query query2 = new RangeQuery(lowerTerm, upperTerm, true); // 组合 BooleanQuery boolQuery = new BooleanQuery(); boolQuery.add(query1, Occur.MUST); boolQuery.add(query2, Occur.SHOULD); queryAndPrintResult(boolQuery); } /** * 有+-的方式 +表示MUST -表示MUST_NOT 什么都不写表示SHOULD * 也有AND OR NOT的方式 */ @Test public void testQueryString() { // String queryString = "+content:\"绅士 饭店\"~2 -size:[000000000000dw TO 000000000000rs]"; // String queryString = "content:\"绅士 饭店\"~2 AND size:[000000000000dw TO 000000000000rs]"; // String queryString = "content:\"绅士 饭店\"~2 OR size:[000000000000dw TO 000000000000rs]"; // String queryString = "(content:\"绅士 饭店\"~2 NOT size:[000000000000dw TO 000000000000rs])"; // String queryString = "-content:\"绅士 饭店\"~2 AND -size:[000000000000dw TO 000000000000rs]"; // String queryString = "-content:\"绅士 饭店\"~2 OR -size:[000000000000dw TO 000000000000rs]"; String queryString = "-content:\"绅士 饭店\"~2 NOT -size:[000000000000dw TO 000000000000rs]"; QueryResult qr = indexDao.search(queryString, 0, 10); System.out.println("总共有【" + qr.getRecordCount() + "】条匹配结果"); for (Document doc : qr.getRecordList()) { File2DocumentUtils.printDocument(doc); } } }
7、lucene的工具方法
package cn.hj.lucene.utils; import java.io.File; import java.io.IOException; import org.apache.commons.io.FileUtils; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.NumberTools; import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; public class File2DocumentUtils { /** * 把文件内容转化为Document * @param path * @return */ public static Document file2Document(String path){ File file = new File(path); Document doc = new Document(); doc.add(new Field("name",file.getName(),Store.YES,Index.ANALYZED)); try { doc.add(new Field("content",FileUtils.readFileToString(file),Store.YES,Index.ANALYZED)); } catch (IOException e) {e.printStackTrace();} doc.add(new Field("size",String.valueOf(path.length()),Store.YES,Index.NOT_ANALYZED)); doc.add(new Field("path",path,Store.YES,Index.NO)); return doc; } /** * 打印搜索结果信息 */ public static void printDocument(Document doc){ //第一种方式的获取值 // Field field = doc.getField("name"); // String name = field.stringValue(); //第二种方式的获取值 System.out.println("----------------------------------"); System.out.println("name = "+doc.get("name")); System.out.println("content = "+doc.get("content")); // System.out.println("size = "+doc.get("size")); System.out.println("size = "+NumberTools.stringToLong(doc.get("size"))); System.out.println("path = "+doc.get("path")); } }
工程代码:http://download.csdn.net/detail/wxwzy738/5248905
相关文章推荐
- 一、lucene2.4的创建和查询及其分词显示
- Lucene查询语法简介
- Lucene学习总结之八:Lucene的查询语法,JavaCC及QueryParser(2)
- LINQ查询操作符之Select、Where、OrderBy、OrderByDescending、GroupBy、Join、GroupJoin及其对应的查询语法
- kibana使用的lucene查询语法
- 5、步步为营VS 2008 + .NET 3.5(5) - LINQ查询操作符之Select、Where、OrderBy、OrderByDescending、GroupBy、Join、GroupJoin及其对应的查询语法
- Lucene学习总结之八:Lucene的查询语法,JavaCC及QueryParser(1)
- 步步为营VS 2008 + .NET 3.5(5) - LINQ查询操作符之Select、Where、OrderBy、OrderByDescending、GroupBy、Join、GroupJoin及其对应的查询语法
- Lucene学习(四):查询语法详解
- lucene的查询语法
- Lucene学习总结之八:Lucene的查询语法,JavaCC及QueryParser(2)
- Lucene查询语法详解
- ELK:kibana使用的lucene查询语法
- LINQ查询操作符之Select、Where、OrderBy、OrderByDescending、GroupBy、Join、GroupJoin及其对应的查询语法
- Lucene查询语法详解
- Lucene Syntax (lucene查询语法详解)
- Lucene的查询语法
- lucene 范围查询及其原理
- lucene语法 lucene查询语法详解
- Lucene学习总结之八:Lucene的查询语法,JavaCC及QueryParser(1)