Lucene创建索引入门案例
2016-07-03 23:34
393 查看
原:http://blog.csdn.net/zwx19921215/article/details/32936395
最近在学习lucene,参考网上的资料写了一个简单搜索demo;
项目jar包:
//索引关键类
[java] view
plain copy
1. <pre name="code" class="java">package com.lucene.index;
2.
3. import java.io.File;
4.
import java.io.IOException;
5. import java.io.StringReader;
6.
import java.util.ArrayList;
7. import java.util.List;
8.
9. import org.apache.lucene.analysis.Analyzer;
10.
import org.apache.lucene.analysis.TokenStream;
11. import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
12.
import org.apache.lucene.document.Document;
13. import org.apache.lucene.document.Field;
14.
import org.apache.lucene.index.CorruptIndexException;
15. import org.apache.lucene.index.IndexReader;
16.
import org.apache.lucene.index.IndexWriter;
17. import org.apache.lucene.index.IndexWriterConfig;
18.
import org.apache.lucene.queryParser.ParseException;
19. import org.apache.lucene.queryParser.QueryParser;
20.
import org.apache.lucene.search.IndexSearcher;
21. import org.apache.lucene.search.Query;
22.
import org.apache.lucene.search.TopDocs;
23. import org.apache.lucene.store.Directory;
24.
import org.apache.lucene.store.FSDirectory;
25. import org.apache.lucene.store.LockObtainFailedException;
26.
import org.apache.lucene.util.Version;
27. import org.wltea.analyzer.lucene.IKAnalyzer;
28.
29. import com.lucene.vo.User;
30.
31. /**
32.
* * lucene 检索内存索引 非常简单的例子 * * @author Administrator *
33. */
34.
public class searchIndex {
35. private String[] ids = { "1", "2", "3", "4", "5", "6" };
36.
private String[] emails = { "aa@itat.org", "bb@itat.org", "cc@cc.org", "dd@sina.org", "ee@zttc.edu", "ff@itat.org" };
37. // private String[] contents = { "welcome to visited the space,I like book", "hello boy, I like pingpeng ball", "my name is cc I like game", "I like football",
38.
// "I like football and I like basketball too", "I like movie and swim" };
39. private String[] contents = { "创建一个内存目录对象,所以这里生成的索引会放在磁盘中,而不是在内存中", "创建索引写入对象,该对象既可以把索引写入到磁盘中也可以写入到内存中", "分词器,分词器就是将检索的关键字分割成一组组词组, 它是lucene检索查询的一大特色之一", "这个是分词器拆分最大长度,因为各种不同类型的分词器拆分的字符颗粒细化程度不一样,所以需要设置一个最长的拆分长度",
40.
"文档对象,在lucene中创建的索引可以看成数据库中的一张表,表中也可以有字段,往里面添加内容之后可以根据字段去匹配查询", "I like movie and swim" };
41. private String[] names = { "zhangsan", "lisi", "john", "jetty", "mike", "jake" };
42.
// 创建一个内存目录对象,所以这里生成的索引会放在磁盘中,而不是在内存中。
43. private Directory directory = null;
44.
//IK分词器
45. IKAnalyzer analyzer = null;
46.
public searchIndex() {
47. try {
48.
directory = FSDirectory.open(new File("H:/lucene/index"));
49. analyzer = new IKAnalyzer(true);
50.
} catch (IOException e) {
51. // TODO Auto-generated catch block
52.
e.printStackTrace();
53. }
54.
}
55.
56.
public void index() {
57. /*
58.
* 创建索引写入对象,该对象既可以把索引写入到磁盘中也可以写入到内存中。
59. */
60.
IndexWriter writer;
61. try {
62.
writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36, analyzer));
63. //创建之前先删除
64.
writer.deleteAll();
65. // 创建Document
66.
// 文档对象,在lucene中创建的索引可以看成数据库中的一张表,表中也可以有字段,往里面添加内容之后可以根据字段去匹配查询
67.
68.
Document doc =null;
69.
70.
for(int i=0;i<ids.length;i++){
71. doc = new Document();
72.
doc.add(new Field("id", ids[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
73. doc.add(new Field("email", emails[i], Field.Store.YES, Field.Index.NOT_ANALYZED));
74.
doc.add(new Field("content", contents[i], Field.Store.NO, Field.Index.ANALYZED));
75. doc.add(new Field("name", names[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
76.
writer.addDocument(doc);
77. }
78.
writer.close();
79. } catch (CorruptIndexException e) {
80.
// TODO Auto-generated catch block
81. e.printStackTrace();
82.
} catch (LockObtainFailedException e) {
83. // TODO Auto-generated catch block
84.
e.printStackTrace();
85. } catch (IOException e) {
86.
// TODO Auto-generated catch block
87. e.printStackTrace();
88.
}
89. }
90.
91. public List<User> search(String keyword) {
92.
long startTime = System.currentTimeMillis();
93. System.out.println("*****************检索开始**********************");
94.
List<User> userList = new ArrayList<User>();
95. IndexReader reader;
96.
try {
97. reader = IndexReader.open(directory);
98.
99. // 创建IndexSearcher 检索索引的对象,里面要传递上面写入的内存目录对象directory
100.
IndexSearcher searcher = new IndexSearcher(reader);
101. // 根据搜索关键字 封装一个term组合对象,然后封装成Query查询对象
102.
103. QueryParser queryParser = new QueryParser(Version.LUCENE_36, "content", analyzer);
104.
Query query = queryParser.parse(keyword);
105.
106.
107. // 去索引目录中查询,返回的是TopDocs对象,里面存放的就是上面放的document文档对象
108.
TopDocs rs = searcher.search(query, null, 10);
109. long endTime = System.currentTimeMillis();
110.
System.out.println("总共花费" + (endTime - startTime) + "毫秒,检索到" + rs.totalHits + "条记录。");
111. User user = null;
112.
for (int i = 0; i < rs.scoreDocs.length; i++) {
113. // rs.scoreDocs[i].doc 是获取索引中的标志位id, 从0开始记录
114.
Document firstHit = searcher.doc(rs.scoreDocs[i].doc);
115. user = new User();
116.
user.setId(Long.parseLong(firstHit.get("id")));
117. user.setName(firstHit.get("name"));
118.
user.setSex(firstHit.get("sex"));
119. user.setDosomething(firstHit.get("dosometing"));
120.
user.setEmail(firstHit.get("email"));
121. user.setContent(firstHit.get("content"));
122.
userList.add(user);
123.
124.
// System.out.println("name:" + firstHit.get("name"));
125. // System.out.println("sex:" + firstHit.get("sex"));
126.
// System.out.println("dosomething:" + firstHit.get("dosometing"));
127. }
128.
reader.close();
129. } catch (CorruptIndexException e1) {
130.
// TODO Auto-generated catch block
131. e1.printStackTrace();
132.
} catch (IOException e1) {
133. // TODO Auto-generated catch block
134.
e1.printStackTrace();
135. } catch (ParseException e) {
136.
// TODO Auto-generated catch block
137. e.printStackTrace();
138.
}
139.
140.
System.out.println("*****************检索结束**********************");
141. return userList;
142.
}
143.
144.
}
[java] view
plain copy
1. package com.lucene;
2.
3. import java.io.IOException;
4.
import java.util.List;
5.
6.
import javax.servlet.ServletException;
7. import javax.servlet.http.HttpServlet;
8.
import javax.servlet.http.HttpServletRequest;
9. import javax.servlet.http.HttpServletResponse;
10.
11. import com.lucene.index.searchIndex;
12.
import com.lucene.vo.User;
13.
14.
/**
15. * Servlet implementation class searchServlet
16.
*/
17. public class searchServlet extends HttpServlet {
18.
private static final long serialVersionUID = 1L;
19.
20.
/**
21. * Default constructor.
22.
*/
23. public searchServlet() {
24.
// TODO Auto-generated constructor stub
25. }
26.
27. /**
28.
* @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse response)
29. */
30.
protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
31. // TODO Auto-generated method stub
32.
}
33.
34.
/**
35. * @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse response)
36.
*/
37. protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
38.
request.setCharacterEncoding("UTF-8");
39. String keyword = request.getParameter("keyword");
40.
if("".equals(keyword)){
41. keyword="0";
42.
}
43. searchIndex si = new searchIndex();
44.
si.index();
45. List<User> userList = si.search(keyword);
46.
request.setAttribute("userList", userList);
47. request.getRequestDispatcher("search.jsp").forward(request, response);
48.
}
49.
50.
}
[java] view
plain copy
1. package com.lucene.vo;
2.
3. public class User {
4.
private Long id;
5. private String name;
6.
private String sex;
7. private String dosomething;
8.
private String email;
9. private String content;
10.
11. public Long getId() {
12.
return id;
13. }
14.
public void setId(Long id) {
15. this.id = id;
16.
}
17. public String getName() {
18.
return name;
19. }
20.
public void setName(String name) {
21. this.name = name;
22.
}
23. public String getSex() {
24.
return sex;
25. }
26.
public void setSex(String sex) {
27. this.sex = sex;
28.
}
29. public String getDosomething() {
30.
return dosomething;
31. }
32.
public void setDosomething(String dosomething) {
33. this.dosomething = dosomething;
34.
}
35. public String getEmail() {
36.
return email;
37. }
38.
public void setEmail(String email) {
39. this.email = email;
40.
}
41. public String getContent() {
42.
return content;
43. }
44.
public void setContent(String content) {
45. this.content = content;
46.
}
47.
48.
}
[html] view
plain copy
1. <%@ page language="java" contentType="text/html; charset=UTF-8" pageEncoding="UTF-8"%>
2.
<%@taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c"%>
3. <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
4.
<html>
5. <head>
6.
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
7. <title>lucene 全文检索</title>
8.
</head>
9. <body style="text-align: center;">
10.
<form action="searchServlet.do" method="post">
11. <input type="text" name="keyword" /> <input type="submit" value="搜索" />
12.
</form>
13. <div style="height: 10px">
14.
</div>
15. <c:if test="${not empty userList}">
16.
<div>相关信息:</div>
17. <table border="1" align="center">
18.
<tr>
19. <td>ID</td>
20.
<td>姓名</td>
21. <td>性别</td>
22.
<td>邮箱</td>
23. <td>爱好</td>
24.
<td>正文</td>
25. </tr>
26.
<c:forEach items="${ userList}" var="user">
27. <tr>
28.
<td>${user.id }</td>
29. <td>${user.name }</td>
30.
<td>${user.sex }</td>
31. <td>${user.email }</td>
32.
<td>${user.dosomething }</td>
33. <td>${user.content }</td>
34.
</tr>
35. </c:forEach>
36.
</table>
37. </c:if>
38.
</body>
39. </html>
代码测试:
最近在学习lucene,参考网上的资料写了一个简单搜索demo;
项目jar包:
//索引关键类
[java] view
plain copy
1. <pre name="code" class="java">package com.lucene.index;
2.
3. import java.io.File;
4.
import java.io.IOException;
5. import java.io.StringReader;
6.
import java.util.ArrayList;
7. import java.util.List;
8.
9. import org.apache.lucene.analysis.Analyzer;
10.
import org.apache.lucene.analysis.TokenStream;
11. import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
12.
import org.apache.lucene.document.Document;
13. import org.apache.lucene.document.Field;
14.
import org.apache.lucene.index.CorruptIndexException;
15. import org.apache.lucene.index.IndexReader;
16.
import org.apache.lucene.index.IndexWriter;
17. import org.apache.lucene.index.IndexWriterConfig;
18.
import org.apache.lucene.queryParser.ParseException;
19. import org.apache.lucene.queryParser.QueryParser;
20.
import org.apache.lucene.search.IndexSearcher;
21. import org.apache.lucene.search.Query;
22.
import org.apache.lucene.search.TopDocs;
23. import org.apache.lucene.store.Directory;
24.
import org.apache.lucene.store.FSDirectory;
25. import org.apache.lucene.store.LockObtainFailedException;
26.
import org.apache.lucene.util.Version;
27. import org.wltea.analyzer.lucene.IKAnalyzer;
28.
29. import com.lucene.vo.User;
30.
31. /**
32.
* * lucene 检索内存索引 非常简单的例子 * * @author Administrator *
33. */
34.
public class searchIndex {
35. private String[] ids = { "1", "2", "3", "4", "5", "6" };
36.
private String[] emails = { "aa@itat.org", "bb@itat.org", "cc@cc.org", "dd@sina.org", "ee@zttc.edu", "ff@itat.org" };
37. // private String[] contents = { "welcome to visited the space,I like book", "hello boy, I like pingpeng ball", "my name is cc I like game", "I like football",
38.
// "I like football and I like basketball too", "I like movie and swim" };
39. private String[] contents = { "创建一个内存目录对象,所以这里生成的索引会放在磁盘中,而不是在内存中", "创建索引写入对象,该对象既可以把索引写入到磁盘中也可以写入到内存中", "分词器,分词器就是将检索的关键字分割成一组组词组, 它是lucene检索查询的一大特色之一", "这个是分词器拆分最大长度,因为各种不同类型的分词器拆分的字符颗粒细化程度不一样,所以需要设置一个最长的拆分长度",
40.
"文档对象,在lucene中创建的索引可以看成数据库中的一张表,表中也可以有字段,往里面添加内容之后可以根据字段去匹配查询", "I like movie and swim" };
41. private String[] names = { "zhangsan", "lisi", "john", "jetty", "mike", "jake" };
42.
// 创建一个内存目录对象,所以这里生成的索引会放在磁盘中,而不是在内存中。
43. private Directory directory = null;
44.
//IK分词器
45. IKAnalyzer analyzer = null;
46.
public searchIndex() {
47. try {
48.
directory = FSDirectory.open(new File("H:/lucene/index"));
49. analyzer = new IKAnalyzer(true);
50.
} catch (IOException e) {
51. // TODO Auto-generated catch block
52.
e.printStackTrace();
53. }
54.
}
55.
56.
public void index() {
57. /*
58.
* 创建索引写入对象,该对象既可以把索引写入到磁盘中也可以写入到内存中。
59. */
60.
IndexWriter writer;
61. try {
62.
writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36, analyzer));
63. //创建之前先删除
64.
writer.deleteAll();
65. // 创建Document
66.
// 文档对象,在lucene中创建的索引可以看成数据库中的一张表,表中也可以有字段,往里面添加内容之后可以根据字段去匹配查询
67.
68.
Document doc =null;
69.
70.
for(int i=0;i<ids.length;i++){
71. doc = new Document();
72.
doc.add(new Field("id", ids[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
73. doc.add(new Field("email", emails[i], Field.Store.YES, Field.Index.NOT_ANALYZED));
74.
doc.add(new Field("content", contents[i], Field.Store.NO, Field.Index.ANALYZED));
75. doc.add(new Field("name", names[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
76.
writer.addDocument(doc);
77. }
78.
writer.close();
79. } catch (CorruptIndexException e) {
80.
// TODO Auto-generated catch block
81. e.printStackTrace();
82.
} catch (LockObtainFailedException e) {
83. // TODO Auto-generated catch block
84.
e.printStackTrace();
85. } catch (IOException e) {
86.
// TODO Auto-generated catch block
87. e.printStackTrace();
88.
}
89. }
90.
91. public List<User> search(String keyword) {
92.
long startTime = System.currentTimeMillis();
93. System.out.println("*****************检索开始**********************");
94.
List<User> userList = new ArrayList<User>();
95. IndexReader reader;
96.
try {
97. reader = IndexReader.open(directory);
98.
99. // 创建IndexSearcher 检索索引的对象,里面要传递上面写入的内存目录对象directory
100.
IndexSearcher searcher = new IndexSearcher(reader);
101. // 根据搜索关键字 封装一个term组合对象,然后封装成Query查询对象
102.
103. QueryParser queryParser = new QueryParser(Version.LUCENE_36, "content", analyzer);
104.
Query query = queryParser.parse(keyword);
105.
106.
107. // 去索引目录中查询,返回的是TopDocs对象,里面存放的就是上面放的document文档对象
108.
TopDocs rs = searcher.search(query, null, 10);
109. long endTime = System.currentTimeMillis();
110.
System.out.println("总共花费" + (endTime - startTime) + "毫秒,检索到" + rs.totalHits + "条记录。");
111. User user = null;
112.
for (int i = 0; i < rs.scoreDocs.length; i++) {
113. // rs.scoreDocs[i].doc 是获取索引中的标志位id, 从0开始记录
114.
Document firstHit = searcher.doc(rs.scoreDocs[i].doc);
115. user = new User();
116.
user.setId(Long.parseLong(firstHit.get("id")));
117. user.setName(firstHit.get("name"));
118.
user.setSex(firstHit.get("sex"));
119. user.setDosomething(firstHit.get("dosometing"));
120.
user.setEmail(firstHit.get("email"));
121. user.setContent(firstHit.get("content"));
122.
userList.add(user);
123.
124.
// System.out.println("name:" + firstHit.get("name"));
125. // System.out.println("sex:" + firstHit.get("sex"));
126.
// System.out.println("dosomething:" + firstHit.get("dosometing"));
127. }
128.
reader.close();
129. } catch (CorruptIndexException e1) {
130.
// TODO Auto-generated catch block
131. e1.printStackTrace();
132.
} catch (IOException e1) {
133. // TODO Auto-generated catch block
134.
e1.printStackTrace();
135. } catch (ParseException e) {
136.
// TODO Auto-generated catch block
137. e.printStackTrace();
138.
}
139.
140.
System.out.println("*****************检索结束**********************");
141. return userList;
142.
}
143.
144.
}
[java] view
plain copy
1. package com.lucene;
2.
3. import java.io.IOException;
4.
import java.util.List;
5.
6.
import javax.servlet.ServletException;
7. import javax.servlet.http.HttpServlet;
8.
import javax.servlet.http.HttpServletRequest;
9. import javax.servlet.http.HttpServletResponse;
10.
11. import com.lucene.index.searchIndex;
12.
import com.lucene.vo.User;
13.
14.
/**
15. * Servlet implementation class searchServlet
16.
*/
17. public class searchServlet extends HttpServlet {
18.
private static final long serialVersionUID = 1L;
19.
20.
/**
21. * Default constructor.
22.
*/
23. public searchServlet() {
24.
// TODO Auto-generated constructor stub
25. }
26.
27. /**
28.
* @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse response)
29. */
30.
protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
31. // TODO Auto-generated method stub
32.
}
33.
34.
/**
35. * @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse response)
36.
*/
37. protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
38.
request.setCharacterEncoding("UTF-8");
39. String keyword = request.getParameter("keyword");
40.
if("".equals(keyword)){
41. keyword="0";
42.
}
43. searchIndex si = new searchIndex();
44.
si.index();
45. List<User> userList = si.search(keyword);
46.
request.setAttribute("userList", userList);
47. request.getRequestDispatcher("search.jsp").forward(request, response);
48.
}
49.
50.
}
[java] view
plain copy
1. package com.lucene.vo;
2.
3. public class User {
4.
private Long id;
5. private String name;
6.
private String sex;
7. private String dosomething;
8.
private String email;
9. private String content;
10.
11. public Long getId() {
12.
return id;
13. }
14.
public void setId(Long id) {
15. this.id = id;
16.
}
17. public String getName() {
18.
return name;
19. }
20.
public void setName(String name) {
21. this.name = name;
22.
}
23. public String getSex() {
24.
return sex;
25. }
26.
public void setSex(String sex) {
27. this.sex = sex;
28.
}
29. public String getDosomething() {
30.
return dosomething;
31. }
32.
public void setDosomething(String dosomething) {
33. this.dosomething = dosomething;
34.
}
35. public String getEmail() {
36.
return email;
37. }
38.
public void setEmail(String email) {
39. this.email = email;
40.
}
41. public String getContent() {
42.
return content;
43. }
44.
public void setContent(String content) {
45. this.content = content;
46.
}
47.
48.
}
[html] view
plain copy
1. <%@ page language="java" contentType="text/html; charset=UTF-8" pageEncoding="UTF-8"%>
2.
<%@taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c"%>
3. <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
4.
<html>
5. <head>
6.
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
7. <title>lucene 全文检索</title>
8.
</head>
9. <body style="text-align: center;">
10.
<form action="searchServlet.do" method="post">
11. <input type="text" name="keyword" /> <input type="submit" value="搜索" />
12.
</form>
13. <div style="height: 10px">
14.
</div>
15. <c:if test="${not empty userList}">
16.
<div>相关信息:</div>
17. <table border="1" align="center">
18.
<tr>
19. <td>ID</td>
20.
<td>姓名</td>
21. <td>性别</td>
22.
<td>邮箱</td>
23. <td>爱好</td>
24.
<td>正文</td>
25. </tr>
26.
<c:forEach items="${ userList}" var="user">
27. <tr>
28.
<td>${user.id }</td>
29. <td>${user.name }</td>
30.
<td>${user.sex }</td>
31. <td>${user.email }</td>
32.
<td>${user.dosomething }</td>
33. <td>${user.content }</td>
34.
</tr>
35. </c:forEach>
36.
</table>
37. </c:if>
38.
</body>
39. </html>
代码测试:
相关文章推荐
- 搜狗百度360市值齐跌:搜索引擎们陷入集体焦虑?
- 本人即将筹备败家日志,敬请期待!
- IE:使用搜索助手
- PostgreSQL教程(八):索引详解
- Oracle外键不加索引引起死锁示例
- oracle 索引的相关介绍(创建、简介、技巧、怎样查看) .
- 用SQL建立索引的方法步骤
- SQL2005重新生成索引的的存储过程 sp_rebuild
- SQL效率提升之一些SQL编写建议并有效利用索引
- SQLSERVER的非聚集索引结构深度理解
- SQL Server误区30日谈 第8天 有关对索引进行在线操作的误区
- SQL Server 索引介绍
- SqlServer 索引自动优化工具
- mysql 中存在null和空时创建唯一索引的方法
- 详解sqlserver查询表索引
- 优化 SQL Server 索引的小技巧
- sqlserver 索引的一些总结
- MySQL下使用Inplace和Online方式创建索引的教程
- mysql中索引使用不当速度比没加索引还慢的测试
- Sql Server 查询性能优化之走出索引的误区分析