Lucene索引创建、查询与高亮
2015-06-23 16:50
274 查看
前面我们介绍少了Lucene的基本使用及中文分词,下面我以一个实例来看一下Lucene的使用方式。
1.创建实例对象
实例可以是文本文件、网页或数据库数据等,读取后创建索引文件,以People对象问获取的数据实体:
2.创建索引
根据原数据生成索引文件
3.搜索
通过关键词,同时搜索多个多个field。
1.lucene教程
2.Lucene分页查询
3.Lucene3.5.0以上(包含3.5.0)版本自定义日期排序
4.Lucene搜索方式大合集
5.lucene 查询+分页+排序
6.Lucene 3.6 中文分词、分页查询、高亮显示等
1.创建实例对象
实例可以是文本文件、网页或数据库数据等,读取后创建索引文件,以People对象问获取的数据实体:
package cn.slimsmart.lucene.demo.example1; import java.util.ArrayList; import java.util.List; public class People { private String id; private String name; private String desc; public String getId() { return id; } public void setId(String id) { this.id = id; } public String getName() { return name; } public void setName(String name) { this.name = name; } public String getDesc() { return desc; } public void setDesc(String desc) { this.desc = desc; } public static List<People> getInitList(){ List<People> list = new ArrayList<People>(); People user = new People(); user.setId("10001"); user.setName("张三"); user.setDesc("张三是个农民,勤劳致富,奔小康"); list.add(user); user = new People(); user.setId("20001"); user.setName("李四"); user.setDesc("李四是个企业家,白手起家,致富一方"); list.add(user); user = new People(); user.setId("11111"); user.setName("王五"); user.setDesc("王五好吃懒做,溜须拍马,跟着李四,也过着小康的日子"); list.add(user); return list; } }
2.创建索引
根据原数据生成索引文件
package cn.slimsmart.lucene.demo.example1; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import org.wltea.analyzer.lucene.IKAnalyzer; public class CreateIndex { public static void write() throws Exception { String indexDir = "src/main/resources";// 索引目录 // 内存 // RAMDirectory = new RAMDirectory(); Directory fsDirectory = FSDirectory.open(new File(indexDir)); IndexWriter fsIndexWriter = new IndexWriter(fsDirectory, getConfig()); fsIndexWriter.addDocuments(getData()); // 通过内存索引写入到文件中 // fsIndexWriter.addIndexes(new Directory[] { RAMDirectory }); fsIndexWriter.commit(); fsIndexWriter.close(); fsDirectory.close(); } public static IndexWriterConfig getConfig() { Analyzer analyzer = new IKAnalyzer(true);// 采用的分词器 IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_4, analyzer); config.setOpenMode(OpenMode.CREATE_OR_APPEND); return config; } public static Iterable<? extends Iterable<? extends IndexableField>> getData() { List<Document> List = new ArrayList<Document>(); List<People> datas = People.getInitList(); for (People p : datas) { Document doc = new Document(); // StoredField 仅仅存储,没有索引的 // intField LongField 这样字段用于排序和过滤 doc.add(new StringField("id", p.getId(), Field.Store.YES)); doc.add(new StringField("name", p.getName(), Field.Store.YES)); doc.add(new TextField("desc", p.getDesc(), Field.Store.YES)); List.add(doc); } return List; } public void deleteDoc(String id) { try { Analyzer analyzer = new IKAnalyzer(true);// 采用的分词器 String indexDir = "src/main/resources";// 索引目录 Directory dir = FSDirectory.open(new File(indexDir)); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_10_4, analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); IndexWriter writer = new IndexWriter(dir, iwc); writer.deleteDocuments(new Term("id", id)); //更新索引 //writer.updateDocument(term, doc); writer.commit(); writer.close(); dir.close(); } catch (IOException e) { e.printStackTrace(); } } }
3.搜索
通过关键词,同时搜索多个多个field。
package cn.slimsmart.lucene.demo.example1; import java.io.File; import java.io.StringReader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Document; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.queryparser.classic.MultiFieldQueryParser; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.Formatter; import org.apache.lucene.search.highlight.Fragmenter; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.Scorer; import org.apache.lucene.search.highlight.SimpleFragmenter; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.wltea.analyzer.lucene.IKAnalyzer; public class SearchKeyword { public static void search(String queryString) throws Exception { String[] fields = { "id", "name", "desc" }; String indexDir = "src/main/resources";// 索引目录 Analyzer analyzer = new IKAnalyzer(true);// 采用的分词器 QueryParser queryParse = new MultiFieldQueryParser(fields, analyzer); queryParse.setPhraseSlop(3); Query query = queryParse.parse(queryString); Directory directory = FSDirectory.open(new File(indexDir)); DirectoryReader directoryReader = DirectoryReader.open(directory); IndexSearcher isearcher = new IndexSearcher(directoryReader); /** * 排序 * Sort sort=new Sort(new SortField("birthdays",Type.STRING,false)); * TopDocs topDocs = isearcher.search(query, filter, topnum, sort) * * 分页: * TopFieldCollector c = TopFieldCollector.create(sort, first+end, false, false, false, false); * isearcher.search(query, c); * ScoreDoc[] hits = c.topDocs(first, end).scoreDocs; */ TopDocs topDocs = isearcher.search(query, null, 1000); // 高亮设置 Formatter formatter = new SimpleHTMLFormatter("<font color='red'>", "</font>"); /* * Term term = new Term(USERNAME, content); query = new TermQuery(term); */ Scorer scorer = new QueryScorer(query); Highlighter highlighter = new Highlighter(formatter, scorer); Fragmenter fragmenter = new SimpleFragmenter(100);// 设置每次返回的字符数 highlighter.setTextFragmenter(fragmenter); System.out.println("总共有[" + topDocs.totalHits + "]条匹配结果"); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { System.out.println("-------------------------"); Document doc = isearcher.doc(scoreDoc.doc); System.out.println(doc.get("desc")); TokenStream tokenStream = analyzer.tokenStream("desc",new StringReader(doc.get("desc"))); String str = highlighter.getBestFragment(tokenStream, doc.get("desc")); System.out.println(str); String desc = highlighter.getBestFragment(analyzer,"desc", doc.get("desc")); System.out.println(desc); System.out.println("-------------------------"); } directoryReader.close(); directory.close(); } }运行:
System.out.println("创建索引开始"); CreateIndex.write(); System.out.println("搜索--Keyword"); SearchKeyword.search("小康");可以看到结果:
创建索引开始 加载扩展词典:ext.dic 加载扩展停止词典:stopword.dic 加载扩展停止词典:CH_stopword.dic 搜索--Keyword 总共有[1]条匹配结果 ------------------------- 王五好吃懒做,溜须拍马,跟着李四,也过着小康的日子 王五好吃懒做,溜须拍马,跟着李四,也过着<font color='red'>小康</font>的日子 王五好吃懒做,溜须拍马,跟着李四,也过着<font color='red'>小康</font>的日子 -------------------------参考文章:
1.lucene教程
2.Lucene分页查询
3.Lucene3.5.0以上(包含3.5.0)版本自定义日期排序
4.Lucene搜索方式大合集
5.lucene 查询+分页+排序
6.Lucene 3.6 中文分词、分页查询、高亮显示等
相关文章推荐
- SQL Server全文检索查询浅析
- java Lucene 中自定义排序的实现
- 从零开始使用Hubbledotnet进行全文搜索-前言
- zg手册 之 Mysql 开发(1)-- 中文全文检索插件开发
- 最老程序员创业札记:全文检索、数据挖掘、推荐引擎应用2
- Lucene整合"庖丁解牛"中文分词包
- JAVA lucene全文检索工具包的理解与使用 分享
- Lucene:基于Java的全文检索引擎简介
- 使用Lucene 3.3.0的结构遍历TokenStream的内容.
- hadoop+lucene+web 综合小demo
- Lucene 学习笔记(一)
- lucene集成IK实现中文分词检索
- lucene4.2 + IKanalyzer2012FF_u1简单示例 .
- lucene solr在tomcat中的配置
- Lucene 3.6 contrib 学习总结
- lucene全文检索学习记录,附带源码——三种实现,超全超细致
- elasticsearch安装与调试
- VSB2008网站群内容管理系统(CMS系统)
- Lucene 2.0.0下载安装及测试
- ElasticSearch+Solr几个案例笔记