使用lucene实现简单的全文检索
2017-12-12 17:42
483 查看
本文是一个使用lucene为文本创建并管理索引,根据索引检索文本的简单全文搜索例子。
(完)
Manven依赖
<dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-core</artifactId> <version>6.0.0</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-queryparser</artifactId> <version>6.0.0</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-analyzers-common</artifactId> <version>6.0.0</version> </dependency> <dependency> <groupId>commons-io</groupId> <artifactId>commons-io</artifactId> <version>2.5</version> </dependency>
索引管理
创建索引
public static void createIndex(FileManage file){ //索引保存路径 String indexRootPath = getIndexRoot(); File f = new File(indexRootPath); if(!f.exists()){ f.mkdirs(); } String indexPath = indexRootPath+File.separator+File.separator; Directory indexDir = null; IndexWriter indexWriter = null; try{ indexDir = FSDirectory.open(new File(indexPath).toPath()); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(analyzer); indexWriter = new IndexWriter(indexDir, config); File sourceFile = new File(sourceRootPath); String content = org.apache.commons.io.FileUtils.readFileToString(sourceFile, "utf-8"); Document doc = new Document(); doc.add(new TextField("fileName", file.getPath(), Field.Store.YES)); doc.add(new TextField("content", content, Field.Store.YES)); indexWriter.addDocument(doc); indexWriter.flush(); }catch(Exception e){ log.error("create index error|",e); }finally{ if(null != indexWriter){ try { indexWriter.close(); } catch (IOException e) { log.error("",e); } } } }
删除索引
public static void deleteIndex(FileManage file){ String indexRootPath = getIndexRoot(); File f = new File(indexRootPath); if(!f.exists()){ f.mkdirs(); } String indexPath = indexRootPath+File.separator; Directory indexDir = null; IndexWriter indexWriter = null; try{ indexDir = FSDirectory.open(new File(indexPath).toPath()); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(analyzer); indexWriter = new IndexWriter(indexDir, config); indexWriter.deleteDocuments(new Term("fileName", file.getFileName())); indexWriter.flush(); }catch(Exception e){ log.error("create index error|",e); }finally{ if(null != indexWriter){ try { indexWriter.close(); } catch (IOException e) { log.error("",e); } } } }
更新索引
public static void updateIndex(FileManage file){ String indexRootPath = getIndexRoot(); String sourceRootPath = getSourceRoot(); File f = new File(indexRootPath); if(!f.exists()){ f.mkdirs(); } String indexPath = indexRootPath+File.separator; Directory indexDir = null; IndexWriter indexWriter = null; try{ indexDir = FSDirectory.open(new File(indexPath).toPath()); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(analyzer); indexWriter = new IndexWriter(indexDir, config); File sourceFile = new File(file.getPath()); String content = org.apache.commons.io.FileUtils.readFileToString(sourceFile, "utf-8"); Document doc = new Document(); doc.add(new TextField("fileName", file.getFileName(), Field.Store.YES)); doc.add(new TextField("content", content, Field.Store.YES)); indexWriter.updateDocument(new Term("fileName", file.getFileName()),doc); indexWriter.flush(); }catch(Exception e){ log.error("create index error|",e); }finally{ if(null != indexWriter){ try { indexWriter.close(); } catch (IOException e) { log.error("",e); } } } }
根据索引检索
public static List<SearchResult> search(List<FileManage> sourcefiles, String param){ if(XaUtil.isEmpty(sourcefiles)){ return Collections.EMPTY_LIST; } List<SearchResult> result = new ArrayList<SearchResult>(); String indexRootPath = getIndexRoot(); File f = new File(indexRootPath); if(!f.exists()){ f.mkdirs(); } Directory indexDir = null; DirectoryReader ireader = null; try{ Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(analyzer); String[] fieldStr = new String[]{"fileName", "content"}; BooleanClause.Occur[] flags = {BooleanClause.Occur.MUST, BooleanClause.Occur.MUST}; String indexPath = indexRootPath+File.separator; indexDir = FSDirectory.open(new File(indexPath).toPath()); for(FileManage file : sourcefiles){ ireader = DirectoryReader.open(indexDir); IndexSearcher isearcher = new IndexSearcher(ireader); String[] queryAry = new String[]{file.getUuidName(), param}; Query query = MultiFieldQueryParser.parse(queryAry,fieldStr,flags,analyzer); ScoreDoc[] hits = isearcher.search(query,1000).scoreDocs; Document doc =null; for(ScoreDoc hit : hits){ doc = isearcher.doc(hit.doc); SearchResult sr = new SearchResult(); sr.setScore(hit.score); sr.setFileName(doc.get("fileName")); result.add(sr); } } }catch (Exception e){ log.error("",e); }finally { if(null != indexDir){ try { indexDir.close(); } catch (IOException e) { log.error("",e); } } if(null != ireader){ try { ireader.close(); } catch (IOException e) { log.error("",e); } } return result; } }
(完)
相关文章推荐
- 使用compass+lucene实现简单的全文检索功能
- 使用Lucene-Spatial实现集成地理位置的全文检索
- 火力全开——仿造Baidu简单实现基于Lucene.net的全文检索的功能
- 使用Lucene.Net实现全文检索
- 简单内容管理实现:使用JCR对二进制文件的读写和全文检索
- lucene学习总结篇--lucene全文检索的基本原理和lucene API简单的使用
- 使用Lucene-Spatial实现集成地理位置的全文检索
- 站内搜索------仿造Baidu简单实现基于Lucene.net的全文检索的功能
- 使用Lucene.Net实现全文检索
- 使用Lucene-Spatial实现集成地理位置的全文检索
- 使用lucene实现全文检索
- 仿造Baidu简单实现基于Lucene.net的全文检索的功能
- 使用Lucene对doc、docx、pdf、txt文档进行全文检索功能的实现
- 使用Lucene-Spatial实现集成地理位置的全文检索
- 仿造Baidu简单实现基于Lucene.net的全文检索的功能
- 使用Lucene-Spatial实现集成地理位置的全文检索
- 使用Lucene.Net实现全文检索
- 使用Lucene.Net实现全文检索
- 火力全开——仿造Baidu简单实现基于Lucene.net的全文检索的功能
- 使用Lucene.Net实现全文检索