您的位置:首页 > 其它

使用Lucene的highlight包高亮显示检索关键字

2011-11-08 15:46 453 查看
在Lucene的org.apache.lucene.search.highlight包中提供了关于高亮显示检索关键字的工具。

使用百度、Google搜索的时候,检索结果显示的时候,在摘要中实现与关键字相同的词条进行高亮显示,百度和Google指定红色高亮显示。

需要的包有lucene-core-3.4.0.jar,IKAnalyzer3.2.8.jar,lucene-highlighter-3.4.0.jar

例子如下:

package com;

import java.io.IOException;
import java.io.StringReader;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;

public class HightLightTest {

static Directory dir = new RAMDirectory();
static Analyzer analyzer = new IKAnalyzer();
static String []bookNames = {"java开发手册","深入java开发","java基础","程序设计java开发","java案例精讲","hadoop项目实例汇总"};
public static void main(String []args)throws Exception {

index();
TopDocs topDocs = searcher("bookName","java");
System.out.println("共有记录"+topDocs.totalHits+"条");
System.out.println("-----------------------------------------------");
display(topDocs);
System.out.println("-----------------------------------------------");
highLightDisplay(topDocs,"java");
}

//把查询到的图书进行显示,并把关键字进行高亮显示
public static void highLightDisplay(TopDocs topDocs,String keyWords) throws CorruptIndexException, IOException, ParseException, InvalidTokenOffsetsException{

IndexSearcher searcher = new IndexSearcher(dir);
QueryParser queryParser = new QueryParser(Version.LUCENE_34,"bookName", analyzer);
Query query = queryParser.parse(keyWords);
ScoreDoc [] scoreDoc = topDocs.scoreDocs;

SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");
Highlighter highlighter = new Highlighter(simpleHTMLFormatter,new QueryScorer(query));
highlighter.setTextFragmenter(new SimpleFragmenter(1024));
for(int i=0;i<scoreDoc.length;i++){
Document doc = searcher.doc(scoreDoc[i].doc);
String text = doc.get("bookName");

TokenStream tokenStream = analyzer.tokenStream("bookName",new StringReader(text));
String highLightText = highlighter.getBestFragment(tokenStream, text);

System.out.println(highLightText);
}
searcher.close();
}

//把查询到的图书进行输出
public static void display(TopDocs topDocs) throws CorruptIndexException, IOException{

IndexSearcher searcher = new IndexSearcher(dir);
ScoreDoc [] scoreDoc = topDocs.scoreDocs;
for(int i=0;i<scoreDoc.length;i++){
Document doc = searcher.doc(scoreDoc[i].doc);
System.out.println(doc.get("bookName"));
}
searcher.close();
}

//按照关键字查询图书
public static TopDocs searcher(String fieldName,String keyWords) throws CorruptIndexException, IOException, ParseException{

IndexSearcher searcher = new IndexSearcher(dir);
QueryParser queryParser = new QueryParser(Version.LUCENE_34,fieldName,analyzer);
Query query = queryParser.parse(keyWords);
TopDocs topDocs = searcher.search(query, Integer.MAX_VALUE);
searcher.close();
return topDocs;
}

//对图书名称进行索引
public static void index() throws CorruptIndexException, LockObtainFailedException, IOException{

IndexWriter index = new IndexWriter(dir,new IndexWriterConfig(Version.LUCENE_34, analyzer));
for(int i=0;i<bookNames.length;i++){
Document doc = new Document();
doc.add(new Field("bookName",bookNames[i],Field.Store.YES,Field.Index.ANALYZED));
index.addDocument(doc);
}
index.optimize();
index.close();
}

}


下面是运行结果:

共有记录5条

-----------------------------------------------

java基础

java开发手册

深入java开发

java案例精讲

程序设计java开发

-----------------------------------------------

<font color='red'>java</font>基础

<font color='red'>java</font>开发手册

深入<font color='red'>java</font>开发

<font color='red'>java</font>案例精讲

程序设计<font color='red'>java</font>开发
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: