您的位置:首页 > 其它

Lucene查询并高亮显示

2017-07-09 10:00 405 查看

1.导入jar包





2.创建实体Bean

package com.zhishang.lucene;

/**
* Created by Administrator on 2017/7/8.
*/
public class HtmlBean {
private String title;
private String content;
private String url;

public void setTitle(String title) {
this.title = title;
}

public void setContent(String content) {
this.content = content;
}

public void setUrl(String url) {
this.url = url;
}

public String getTitle() {
return title;
}

public String getContent() {
return content;
}

public String getUrl() {
return url;
}
}


3.创建工具Bean

package com.zhishang.lucene;

import net.htmlparser.jericho.Element;
import net.htmlparser.jericho.HTMLElementName;
import net.htmlparser.jericho.Source;
import org.junit.Test;

import java.io.File;
import java.io.IOException;

/**
* Created by Administrator on 2017/7/8.
*/
public class HtmlBeanUtil {

public static HtmlBean parseHtml(File file){
try {
Source sc = new Source(file);
Element element = sc.getFirstElement(HTMLElementName.TITLE);
if (element == null || element.getTextExtractor() == null){
return null;
}

HtmlBean htmlBean = new HtmlBean();
htmlBean.setTitle(element.getTextExtractor().toString());
htmlBean.setContent(sc.getTextExtractor().toString());
htmlBean.setUrl(file.getAbsolutePath());

return htmlBean;
} catch (IOException e) {
e.printStackTrace();
}

return null;
}
}


4.创建操作Bean

package com.zhishang.lucene;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;
import org.wltea.analyzer.lucene.IKAnalyzer;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;

/**
* Created by Administrator on 2017/7/7.
*/
public class SearchIndex {

public List<HtmlBean> search(String keyword){
Directory dir = null;
try {
dir = FSDirectory.open(new File(CreateIndex.indexDir));
IndexReader reader = DirectoryReader.open(dir);
IndexSearcher searcher = new IndexSearcher(reader);
Analyzer analyzer = new IKAnalyzer();
MultiFieldQueryParser multiFieldQueryParser = new MultiFieldQueryParser(Version.LUCENE_4_9,new String[]{"title","content"},analyzer);
Query query = multiFieldQueryParser.parse(keyword);
TopDocs search = searcher.search(query,10);
ScoreDoc[] scoreDocs = search.scoreDocs;
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color='red'>","</font>");
QueryScorer queryScorer = new QueryScorer(query,"title");
Highlighter highlighter = new Highlighter(simpleHTMLFormatter,queryScorer);
List<HtmlBean> htmlBeanList = new ArrayList<HtmlBean>();
for (ScoreDoc scoreDoc:scoreDocs){
Document document = reader.document(scoreDoc.doc);
String title = highlighter.getBestFragment(new IKAnalyzer(),"title",document.get("title"));
String content = highlighter.getBestFragments(new IKAnalyzer().tokenStream("content",document.get("content")),document.get("content"),3,"...");
String url = document.get("url");
HtmlBean htmlBean = new HtmlBean();
htmlBean.setTitle(title);
htmlBean.setContent(content);
htmlBean.setUrl(url);
htmlBeanList.add(htmlBean);
}

return htmlBeanList;
//            System.out.println(search.totalHits);
} catch (IOException e) {
e.printStackTrace();
} catch (ParseException e) {
e.printStackTrace();
} catch (InvalidTokenOffsetsException e) {
e.printStackTrace();
}

return null;
}
}


4.创建测试Bean

package com.zhishang.lucene;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;

import java.io.File;
import java.util.List;

/**
* Created by Administrator on 2017/7/8.
*/
public class LuceneBean {

@Test
public void search(){
SearchIndex searchIndex = new SearchIndex();
List<HtmlBean> htmlBeanList = searchIndex.search("java");
for (HtmlBean bean:htmlBeanList){
System.out.println(bean.getTitle());
System.out.println(bean.getContent());
System.out.println(bean.getUrl());
System.out.println("-----------------------------------------------------");
}
}

/*
创建索引
*/
@Test
public void createIndex(){
File file = new File(CreateIndex.indexDir);
if (file.exists()){
file.delete();
file.mkdirs();
}
CreateIndex createIndex = new CreateIndex();
createIndex.createIndex();
}
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  查询 Lucene 高亮显示