您的位置:首页 > 其它

Lucene索引创建、查询与高亮

2015-06-23 16:50 274 查看
前面我们介绍少了Lucene的基本使用及中文分词,下面我以一个实例来看一下Lucene的使用方式。

1.创建实例对象

实例可以是文本文件、网页或数据库数据等,读取后创建索引文件,以People对象问获取的数据实体:

package cn.slimsmart.lucene.demo.example1;

import java.util.ArrayList;
import java.util.List;

public class People {

private String id;
private String name;
private String desc;

public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getDesc() {
return desc;
}
public void setDesc(String desc) {
this.desc = desc;
}

public static List<People> getInitList(){
List<People> list = new ArrayList<People>();
People user = new People();
user.setId("10001");
user.setName("张三");
user.setDesc("张三是个农民,勤劳致富,奔小康");
list.add(user);
user = new People();
user.setId("20001");
user.setName("李四");
user.setDesc("李四是个企业家,白手起家,致富一方");
list.add(user);
user = new People();
user.setId("11111");
user.setName("王五");
user.setDesc("王五好吃懒做,溜须拍马,跟着李四,也过着小康的日子");
list.add(user);
return list;
}
}


2.创建索引

根据原数据生成索引文件

package cn.slimsmart.lucene.demo.example1;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;

public class CreateIndex {

public static void write() throws Exception {
String indexDir = "src/main/resources";// 索引目录
// 内存
// RAMDirectory = new RAMDirectory();
Directory fsDirectory = FSDirectory.open(new File(indexDir));
IndexWriter fsIndexWriter = new IndexWriter(fsDirectory, getConfig());
fsIndexWriter.addDocuments(getData());
// 通过内存索引写入到文件中
// fsIndexWriter.addIndexes(new Directory[] { RAMDirectory });
fsIndexWriter.commit();
fsIndexWriter.close();
fsDirectory.close();
}

public static IndexWriterConfig getConfig() {
Analyzer analyzer = new IKAnalyzer(true);// 采用的分词器
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_4, analyzer);
config.setOpenMode(OpenMode.CREATE_OR_APPEND);
return config;
}

public static Iterable<? extends Iterable<? extends IndexableField>> getData() {
List<Document> List = new ArrayList<Document>();
List<People> datas = People.getInitList();
for (People p : datas) {
Document doc = new Document();
// StoredField 仅仅存储,没有索引的
// intField LongField 这样字段用于排序和过滤
doc.add(new StringField("id", p.getId(), Field.Store.YES));
doc.add(new StringField("name", p.getName(), Field.Store.YES));
doc.add(new TextField("desc", p.getDesc(), Field.Store.YES));
List.add(doc);
}
return List;
}

public void deleteDoc(String id) {
try {
Analyzer analyzer = new IKAnalyzer(true);// 采用的分词器
String indexDir = "src/main/resources";// 索引目录
Directory dir = FSDirectory.open(new File(indexDir));
IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_10_4, analyzer);
iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
IndexWriter writer = new IndexWriter(dir, iwc);
writer.deleteDocuments(new Term("id", id));
//更新索引
//writer.updateDocument(term, doc);
writer.commit();
writer.close();
dir.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}


3.搜索

通过关键词,同时搜索多个多个field。

package cn.slimsmart.lucene.demo.example1;

import java.io.File;
import java.io.StringReader;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.Scorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.wltea.analyzer.lucene.IKAnalyzer;

public class SearchKeyword {

public static void search(String queryString) throws Exception {
String[] fields = { "id", "name", "desc" };
String indexDir = "src/main/resources";// 索引目录
Analyzer analyzer = new IKAnalyzer(true);// 采用的分词器

QueryParser queryParse = new MultiFieldQueryParser(fields, analyzer);
queryParse.setPhraseSlop(3);
Query query = queryParse.parse(queryString);
Directory directory = FSDirectory.open(new File(indexDir));
DirectoryReader directoryReader = DirectoryReader.open(directory);
IndexSearcher isearcher = new IndexSearcher(directoryReader);
/**
* 排序
* Sort sort=new Sort(new SortField("birthdays",Type.STRING,false));
* TopDocs topDocs = isearcher.search(query, filter, topnum, sort)
*
* 分页:
* TopFieldCollector c = TopFieldCollector.create(sort, first+end, false, false, false, false);
* isearcher.search(query, c);
* ScoreDoc[] hits = c.topDocs(first, end).scoreDocs;
*/

TopDocs topDocs = isearcher.search(query, null, 1000);

// 高亮设置
Formatter formatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");
/*
* Term term = new Term(USERNAME, content); query = new TermQuery(term);
*/
Scorer scorer = new QueryScorer(query);
Highlighter highlighter = new Highlighter(formatter, scorer);
Fragmenter fragmenter = new SimpleFragmenter(100);// 设置每次返回的字符数
highlighter.setTextFragmenter(fragmenter);

System.out.println("总共有[" + topDocs.totalHits + "]条匹配结果");
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
System.out.println("-------------------------");
Document doc = isearcher.doc(scoreDoc.doc);
System.out.println(doc.get("desc"));
TokenStream tokenStream = analyzer.tokenStream("desc",new StringReader(doc.get("desc")));
String str = highlighter.getBestFragment(tokenStream, doc.get("desc"));
System.out.println(str);
String desc = highlighter.getBestFragment(analyzer,"desc", doc.get("desc"));
System.out.println(desc);
System.out.println("-------------------------");
}
directoryReader.close();
directory.close();
}

}
运行:

System.out.println("创建索引开始");
CreateIndex.write();
System.out.println("搜索--Keyword");
SearchKeyword.search("小康");
可以看到结果:

创建索引开始
加载扩展词典:ext.dic
加载扩展停止词典:stopword.dic
加载扩展停止词典:CH_stopword.dic
搜索--Keyword
总共有[1]条匹配结果
-------------------------
王五好吃懒做,溜须拍马,跟着李四,也过着小康的日子
王五好吃懒做,溜须拍马,跟着李四,也过着<font color='red'>小康</font>的日子
王五好吃懒做,溜须拍马,跟着李四,也过着<font color='red'>小康</font>的日子
-------------------------
参考文章:

1.lucene教程

2.Lucene分页查询

3.Lucene3.5.0以上(包含3.5.0)版本自定义日期排序

4.Lucene搜索方式大合集

5.lucene 查询+分页+排序

6.Lucene 3.6 中文分词、分页查询、高亮显示等
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  全文检索 lucene