lucene操作
2017-05-28 12:51
84 查看
package com.java.lucene; import java.io.StringReader; import java.nio.file.Paths; import java.util.Date; import java.util.LinkedList; import java.util.List; import org.apache.commons.lang.StringEscapeUtils; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.Fragmenter; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.search.highlight.SimpleSpanFragmenter; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import com.java.entity.Blog; import com.java.util.DateUtil; import com.java.util.StringUtil; /** * 博客索引类 * @author Administrator * */ public class BlogIndex { private Directory dir; /** * 获取indexWriter实例 * @return * @throws Exception */ private IndexWriter getWriter()throws Exception{ dir = FSDirectory.open(Paths.get("d://lucene")); //写索引的目录 SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer(); //中文分词器 IndexWriterConfig iwc = new IndexWriterConfig(analyzer); //IndexWriter配置中文分析器 IndexWriter writer = new IndexWriter(dir,iwc);//实例化一个IndexWriter return writer; } /** * 添加博客索引 * @param blog * @throws Exception */ public void addIndex(Blog blog)throws Exception{ IndexWriter writer = this.getWriter(); Document doc = new Document(); //StringField 不进行分词 TextField 进行分词,Store.Yes表示进行存储 doc.add(new StringField("id",String.valueOf(blog.getId()),Store.YES)); doc.add(new TextField("title", blog.getTitle(), Store.YES)); doc.add(new StringField("releaseDate", DateUtil.formatDate(new Date(), "yyyy-MM-dd"), Store.YES)); doc.add(new TextField("content", blog.getContentNoTag(), Store.YES)); writer.addDocument(doc); writer.close(); } /** * 删除索引 * @throws Exception */ public void deleteIndex(String blogId)throws Exception{ IndexWriter writer = this.getWriter(); writer.deleteDocuments(new Term("id",blogId));//id唯一标示,并且生成过索引 writer.forceMergeDeletes();//强制删除索引 writer.commit(); writer.close(); } public void updateIndex(Blog blog)throws Exception{ IndexWriter writer = this.getWriter(); Document doc = new Document(); doc.add(new StringField("id",String.valueOf(blog.getId()),Store.YES)); doc.add(new TextField("title", blog.getTitle(), Store.YES)); doc.add(new StringField("releaseDate", DateUtil.formatDate(new Date(), "yyyy-MM-dd"), Store.YES)); doc.add(new TextField("content", blog.getContentNoTag(), Store.YES)); writer.updateDocument(new Term("id",String.valueOf(blog.getId())), doc);//id标示,生成过索引 writer.close(); } /** * lucene查询博客信息 * @param q * @return * @throws Exception */ public List searchBlog(String q)throws Exception{ dir=FSDirectory.open(Paths.get("d://lucene")); IndexReader reader=DirectoryReader.open(dir);//通过dir获得IndexReader IndexSearcher is=new IndexSearcher(reader);//searcher搜索index BooleanQuery.Builder booleanQuery=new BooleanQuery.Builder();//多个字段检索,结果与或非 SmartChineseAnalyzer analyzer=new SmartChineseAnalyzer(); QueryParser parser=new QueryParser("title", analyzer); Query query=parser.parse(q); QueryParser parser2=new QueryParser("content", analyzer); Query query2=parser2.parse(q); booleanQuery.add(query, BooleanClause.Occur.SHOULD);//must与,should或,nust_not非 booleanQuery.add(query2, BooleanClause.Occur.SHOULD); TopDocs hits=is.search(booleanQuery.build(), 100);//最匹配的前100条 QueryScorer scorer=new QueryScorer(query); Fragmenter fragmenter=new SimpleSpanFragmenter(scorer); //得分高的片段 SimpleHTMLFormatter simpleHTMLFormatter=new SimpleHTMLFormatter("");//highligther的格式 Highlighter highlighter=new Highlighter(simpleHTMLFormatter, scorer); highlighter.setTextFragmenter(fragmenter); List blogList=new LinkedList(); for(ScoreDoc scoreDoc:hits.scoreDocs){ //hits.scoreDocs search存在的有得分的前100 socreDoc Document doc=is.doc(scoreDoc.doc);//scoreDoc.doc doc的ID is.doc(id)通过id返回 document Blog blog=new Blog(); blog.setId(Integer.parseInt(doc.get("id"))); blog.setReleaseDateStr(doc.get("releaseDate")); String title=doc.get("title"); String content=StringEscapeUtils.escapeHtml(doc.get("content"));//由于ueditor存入的content是带html标签对的,所以返回时除去标签对,不然页面foreach时就乱码了 if(title!=null){ TokenStream tokenStream=analyzer.tokenStream("title", new StringReader(title)); String hTitle=highlighter.getBestFragment(tokenStream, title); //取得高分片段到页面进行展示 if(StringUtil.isEmpty(hTitle)){ blog.setTitle(title); }else{ blog.setTitle(hTitle); } } if(content!=null){ TokenStream tokenStream=analyzer.tokenStream("content", new StringReader(content)); String hContent=highlighter.getBestFragment(tokenStream, content); if(StringUtil.isEmpty(hContent)){ if(content.length()<=200){ blog.setContent(content); }else{ blog.setContent(content.substring(0, 200)); } }else{ blog.setContent(hContent); } } blogList.add(blog); } return blogList; } }
相关文章推荐
- [Lucene.Net] 多线程操作建议和[Lucene.Net] 分页显示
- 关于Lucene的详细说明和操作使用方式
- 艾伟_转载:Lucene.net操作索引库
- 5、学习lucene之索引操作所用类简单总结
- lucene加权操作和luke的简单演示
- Lucene学习笔记(3)-索引操作
- 艾伟_转载:Lucene.Net操作上的一些技巧
- Lucene对index操作
- lucene3.6.0索引操作的学习笔记
- lucene的写索引的操作问题
- lucene入门-操作excel
- Java控制台操作Lucene C/R/U/D
- Lucene.net操作上的一些技巧
- Lucene多线程操作实现
- Java Lucene(1):索引操作
- Lucene2.0中最常用的基本操作
- 【转帖】[Lucene.Net] 多线程操作建议和[Lucene.Net] 分页显示
- Lucene3.0结果排序原理+操作+示例
- Lucene对index操作
- lucene(全文搜索)_恢复/更新索引操作