您的位置:首页 > 运维架构 > 网站架构

利用lucene给网站、系统增加搜索功能

2013-07-08 21:07 281 查看
有些网站的搜索功能都是直接使用like %关键词%方式对数据库进行关键词查找,不过这种方式速度比较慢,而且影响数据库服务器性能。

其实我们可以先把数据从数据库查询出来,利用lucene建立索引。以后每次查找都从索引中查找,可以提高查询速度和减轻服务器负担。

本篇用到的技术:lucene3.0.2,IKAnalyzer3.2.5

search.properties主要是配置搜索的信息,内容:


sql=select iId,title,content,credate from archeive //指定查找sql,需要建立索引的数据


update.field=iId


update.value=


search.condition=title,content //搜索时的查找字段


index.path=D:/project/Java/lucene/WebContent/WEB-INF/classes/Index //索引的保存地址


period=10000 //更新索引的时间间隔

com.search.util.SearchConfig主要是读取search.properties的信息。


public class SearchConfig {


private Properties searchPro;


private String searchFile = "search.properties";


private String SQL = "sql";


private String CONDITION = "search.condition";


private String INDEX = "index.path";


public SearchConfig(){


initSearch();


}




public void initSearch(){


searchPro = PropertiesUtil.getProperties(searchFile);


}




public String getSql(){


return searchPro.getProperty(SQL, "");


}




public String getCondition(){


return searchPro.getProperty(CONDITION, "");


}




public File getIndexPath(){


String path = searchPro.getProperty(INDEX, "");


File file = new File(path);


if (!file.exists()) {


file.mkdir();


}


return file;


}




public long getPeriod(){


String period = searchPro.getProperty("period", "0");


return Integer.valueOf(period);


}




public String getUpdateField(){


return searchPro.getProperty("update.field", "");


}




public String getUpdateValue(){


return searchPro.getProperty("update.value", "");


}




public void save(){


PropertiesUtil.saveProperties(searchPro, searchFile);


}


}

com.search.util.LuceneUtil代码介绍,主要是生成索引和搜索。


public class LuceneUtil {


private File indexpath = null;


private String sql = null;


private String condition = null;


private String updateField = null;


private String updateValue = null;


private SearchConfig sc = null;


public LuceneUtil() {


sc = new SearchConfig();


indexpath = sc.getIndexPath();


sql = sc.getSql();


condition = sc.getCondition();


updateField = sc.getUpdateField();


updateValue = sc.getUpdateValue();


if(!updateValue.equals("")){


sql = sql + " where " + updateField + " > " + updateValue;


}


}




public void createIndex() {


System.out.println("==========正在生成数据库索引。");


//把数据库中的数据查询出来,


ResultSet rs = SQLHelper.getResultSet(sql);


try {


//打开索引文件


FSDirectory directory = FSDirectory.open(indexpath);


Analyzer analyzer = new IKAnalyzer();


IndexWriter writer = new IndexWriter(FSDirectory.open(indexpath),


analyzer, true, IndexWriter.MaxFieldLength.LIMITED);


while (rs.next()) {


Document doc = new Document();


doc.add(new Field("id", String.valueOf(rs.getInt(1)),


Field.Store.YES, Field.Index.ANALYZED));


doc.add(new Field("title", rs.getString(2), Field.Store.YES,


Field.Index.ANALYZED));


doc.add(new Field("content", rs.getString(3), Field.Store.YES,


Field.Index.ANALYZED));


writer.addDocument(doc);


}


writer.close();


directory.close();


} catch (Exception e) {


e.printStackTrace();


}


}




public List<Document> search(String keyword) {


List<Document> list = new ArrayList<Document>();


try {


FSDirectory directory = FSDirectory.open(indexpath);


IndexReader reader = IndexReader.open(directory, true);


IndexSearcher isearcher = new IndexSearcher(reader);


isearcher.setSimilarity(new IKSimilarity());




if(keyword == null || keyword.equals("")){


return list;


}


Query query = IKQueryParser.parseMultiField(condition.split(","), keyword);




// 搜索相似度最高的10条记录


TopDocs topDocs = isearcher.search(query, 10);




// 输出结果


ScoreDoc[] scoreDocs = topDocs.scoreDocs;




for (int i = 0; i < topDocs.totalHits; i++) {


Document targetDoc = isearcher.doc(scoreDocs[i].doc);


list.add(targetDoc);


}




isearcher.close();


directory.close();


} catch (Exception e) {


e.printStackTrace();


}


return list;


}


}

com.search.listener.Indexlistener启动索引更新程序


public class Indexlistener implements ServletContextListener {


public void contextInitialized(javax.servlet.ServletContextEvent arg0) {


new IndexTask();


}




public void contextDestroyed(javax.servlet.ServletContextEvent arg0) {


}


}

com.search.listener.IndexTask定时更新索引


public class IndexTask {


public IndexTask(){


Timer timer = new Timer();


SearchConfig sc = new SearchConfig();


timer.schedule(new Task(), new Date(), sc.getPeriod());


}




static class Task extends TimerTask{


public void run(){


LuceneUtil lu = new LuceneUtil();


lu.createIndex();


}


}


}

com.search.util.RedHighlighter关键词高亮显示


public class RedHighlighter {




public static String getBestFragment(String keyword, String field, String word){


SearchConfig sc = new SearchConfig();


String condition = sc.getCondition();


try{


Query query = IKQueryParser.parseMultiField(condition.split(","), keyword);


SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(


"<font color='red'>", "</font>");


Highlighter highlighter = new Highlighter(simpleHTMLFormatter,


new QueryScorer(query));


highlighter.setTextFragmenter(new SimpleFragmenter(100));


String c = highlighter.getBestFragment(new IKAnalyzer(),


field, word);


return c;


}


catch(Exception e){


e.printStackTrace();


}


return "";


}


}

index.jsp搜索页面


<%@ page language="java" contentType="text/html; charset=GBK"


pageEncoding="GBK"%>


<%@page import="com.search.util.LuceneUtil" %>


<%@page import="java.util.*" %>


<%@page import="org.apache.lucene.document.Document" %>


<%@page import="com.search.util.RedHighlighter" %>


<%@page import="java.net.URLEncoder"%><html>


<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"




"http://www.w3.org/TR/html4/loose.dtd">


<head>


<meta http-equiv="Content-Type" content="text/html; charset=GBK">


<title>搜索</title>


<link rel="stylesheet" href="./style/style.css" type="text/css">


</head>


<%


//request.setCharacterEncoding("GBK");


String w = request.getParameter("w");


int size = 0;


long time = 0;


List<Document> list = null;


if(w != null && !w.equals("")){


w = new String(w.getBytes("ISO8859-1"), "GBK");


}


else{


w = "";


}




LuceneUtil lu = new LuceneUtil();


Date start = new Date();


list = lu.search(w);


Date end = new Date();


size = list.size();


time = end.getTime() - start.getTime();


%>


<script type="text/javascript">


function submit(){




}


</script>


<body>


<div class="seachInput" align="center">


<form method="get" action="index.jsp"><br>


<input type="text" class="txtSeach" id="w" name="w" value="<%=w %>"


><input type="submit"


class="btnSearch" onclick="submit" value="找一下">    <br>


</form>


</div>


<div id="searchInfo"><span style="float: left; margin-left: 15px;"></span>找到相关内容<%=size%>篇,




用时<%=time%>毫秒


</div>


<div id="main">


<div id="searchResult">


<div class="forflow">


<%


if(list != null && list.size() > 0){


for(Document doc:list){


String title = RedHighlighter.getBestFragment(w, "title", doc.get("title"));


String content = RedHighlighter.getBestFragment(w, "content", doc.get




("content"));


%>


<div class="searchItem">


<a href="#" class="searchItemTitle" target="_blank"><%=title %></a>


<div class="searchCon">


<%=content %>


</div>


</div>


<%


}


}




%>


</div>


</div>


</div>


</body>


</html>
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: 
相关文章推荐