您的位置：首页 > 其它

Lucene创建索引入门案例

2016-07-03 23:34 393 查看

原：http://blog.csdn.net/zwx19921215/article/details/32936395

最近在学习lucene，参考网上的资料写了一个简单搜索demo；
项目jar包：
//索引关键类
[java] view
plain copy

1. <pre name="code" class="java">package com.lucene.index;
2.

3. import java.io.File;
4.
import java.io.IOException;
5. import java.io.StringReader;
6.
import java.util.ArrayList;
7. import java.util.List;
8.

9. import org.apache.lucene.analysis.Analyzer;
10.
import org.apache.lucene.analysis.TokenStream;
11. import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
12.
import org.apache.lucene.document.Document;
13. import org.apache.lucene.document.Field;
14.
import org.apache.lucene.index.CorruptIndexException;
15. import org.apache.lucene.index.IndexReader;
16.
import org.apache.lucene.index.IndexWriter;
17. import org.apache.lucene.index.IndexWriterConfig;
18.
import org.apache.lucene.queryParser.ParseException;
19. import org.apache.lucene.queryParser.QueryParser;
20.
import org.apache.lucene.search.IndexSearcher;
21. import org.apache.lucene.search.Query;
22.
import org.apache.lucene.search.TopDocs;
23. import org.apache.lucene.store.Directory;
24.
import org.apache.lucene.store.FSDirectory;
25. import org.apache.lucene.store.LockObtainFailedException;
26.
import org.apache.lucene.util.Version;
27. import org.wltea.analyzer.lucene.IKAnalyzer;
28.

29. import com.lucene.vo.User;
30.

31. /**
32.
*  * lucene 检索内存索引非常简单的例子  *  * @author Administrator  *
33. */
34.
public class searchIndex {
35.     private String[] ids = { "1", "2", "3", "4", "5", "6" };
36.
    private String[] emails = { "aa@itat.org", "bb@itat.org", "cc@cc.org", "dd@sina.org", "ee@zttc.edu", "ff@itat.org" };
37. //  private String[] contents = { "welcome to visited the space,I like book", "hello boy, I like pingpeng ball", "my name is cc I like game", "I like football",
38.
//          "I like football and I like basketball too", "I like movie and swim" };
39.     private String[] contents = { "创建一个内存目录对象，所以这里生成的索引会放在磁盘中，而不是在内存中", "创建索引写入对象，该对象既可以把索引写入到磁盘中也可以写入到内存中", "分词器，分词器就是将检索的关键字分割成一组组词组，它是lucene检索查询的一大特色之一", "这个是分词器拆分最大长度，因为各种不同类型的分词器拆分的字符颗粒细化程度不一样，所以需要设置一个最长的拆分长度",
40.
            "文档对象，在lucene中创建的索引可以看成数据库中的一张表，表中也可以有字段,往里面添加内容之后可以根据字段去匹配查询", "I like movie and swim" };
41.     private String[] names = { "zhangsan", "lisi", "john", "jetty", "mike", "jake" };
42.
    // 创建一个内存目录对象，所以这里生成的索引会放在磁盘中，而不是在内存中。
43.     private Directory directory = null;
44.
    //IK分词器
45.     IKAnalyzer analyzer = null;
46.
    public searchIndex() {
47.         try {
48.
            directory = FSDirectory.open(new File("H:/lucene/index"));
49.             analyzer = new IKAnalyzer(true);
50.
        } catch (IOException e) {
51.             // TODO Auto-generated catch block
52.
            e.printStackTrace();
53.         }
54.
    }
55.
56.
    public void index() {
57.         /*
58.
         * 创建索引写入对象，该对象既可以把索引写入到磁盘中也可以写入到内存中。
59.          */
60.
        IndexWriter writer;
61.         try {
62.
            writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36, analyzer));
63.             //创建之前先删除
64.
            writer.deleteAll();
65.             // 创建Document
66.
            // 文档对象，在lucene中创建的索引可以看成数据库中的一张表，表中也可以有字段,往里面添加内容之后可以根据字段去匹配查询
67.
68.
            Document doc =null;
69.
70.
            for(int i=0;i<ids.length;i++){
71.                 doc = new Document();
72.
                doc.add(new Field("id", ids[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
73.                 doc.add(new Field("email", emails[i], Field.Store.YES, Field.Index.NOT_ANALYZED));
74.
                doc.add(new Field("content", contents[i], Field.Store.NO, Field.Index.ANALYZED));
75.                 doc.add(new Field("name", names[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
76.
                writer.addDocument(doc);
77.             }
78.
            writer.close();
79.         } catch (CorruptIndexException e) {
80.
            // TODO Auto-generated catch block
81.             e.printStackTrace();
82.
        } catch (LockObtainFailedException e) {
83.             // TODO Auto-generated catch block
84.
            e.printStackTrace();
85.         } catch (IOException e) {
86.
            // TODO Auto-generated catch block
87.             e.printStackTrace();
88.
        }
89.     }
90.

91.     public List<User> search(String keyword) {
92.
        long startTime = System.currentTimeMillis();
93.         System.out.println("*****************检索开始**********************");
94.
        List<User> userList = new ArrayList<User>();
95.         IndexReader reader;
96.
        try {
97.             reader = IndexReader.open(directory);
98.

99.             // 创建IndexSearcher 检索索引的对象，里面要传递上面写入的内存目录对象directory
100.
            IndexSearcher searcher = new IndexSearcher(reader);
101.             // 根据搜索关键字封装一个term组合对象，然后封装成Query查询对象
102.

103.             QueryParser queryParser = new QueryParser(Version.LUCENE_36, "content", analyzer);
104.
            Query query = queryParser.parse(keyword);
105.
106.

107.             // 去索引目录中查询，返回的是TopDocs对象，里面存放的就是上面放的document文档对象
108.
            TopDocs rs = searcher.search(query, null, 10);
109.             long endTime = System.currentTimeMillis();
110.
            System.out.println("总共花费" + (endTime - startTime) + "毫秒，检索到" + rs.totalHits + "条记录。");
111.             User user = null;
112.
            for (int i = 0; i < rs.scoreDocs.length; i++) {
113.                 // rs.scoreDocs[i].doc 是获取索引中的标志位id, 从0开始记录
114.
                Document firstHit = searcher.doc(rs.scoreDocs[i].doc);
115.                 user = new User();
116.
                user.setId(Long.parseLong(firstHit.get("id")));
117.                 user.setName(firstHit.get("name"));
118.
                user.setSex(firstHit.get("sex"));
119.                 user.setDosomething(firstHit.get("dosometing"));
120.
                user.setEmail(firstHit.get("email"));
121.                 user.setContent(firstHit.get("content"));
122.
                userList.add(user);
123.
124.
//              System.out.println("name:" + firstHit.get("name"));
125. //              System.out.println("sex:" + firstHit.get("sex"));
126.
//              System.out.println("dosomething:" + firstHit.get("dosometing"));
127.             }
128.
            reader.close();
129.         } catch (CorruptIndexException e1) {
130.
            // TODO Auto-generated catch block
131.             e1.printStackTrace();
132.
        } catch (IOException e1) {
133.             // TODO Auto-generated catch block
134.
            e1.printStackTrace();
135.         } catch (ParseException e) {
136.
            // TODO Auto-generated catch block
137.             e.printStackTrace();
138.
        }
139.
140.
        System.out.println("*****************检索结束**********************");
141.         return userList;
142.
    }
143.
144.
}

[java] view
plain copy

1. package com.lucene;
2.

3. import java.io.IOException;
4.
import java.util.List;
5.
6.
import javax.servlet.ServletException;
7. import javax.servlet.http.HttpServlet;
8.
import javax.servlet.http.HttpServletRequest;
9. import javax.servlet.http.HttpServletResponse;
10.

11. import com.lucene.index.searchIndex;
12.
import com.lucene.vo.User;
13.
14.
/**
15. * Servlet implementation class searchServlet
16.
*/
17. public class searchServlet extends HttpServlet {
18.
    private static final long serialVersionUID = 1L;
19.
20.
    /**
21.      * Default constructor.
22.
     */
23.     public searchServlet() {
24.
        // TODO Auto-generated constructor stub
25.     }
26.

27.     /**
28.
     * @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse response)
29.      */
30.
    protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
31.         // TODO Auto-generated method stub
32.
    }
33.
34.
    /**
35.      * @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse response)
36.
     */
37.     protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
38.
        request.setCharacterEncoding("UTF-8");
39.         String keyword = request.getParameter("keyword");
40.
        if("".equals(keyword)){
41.             keyword="0";
42.
        }
43.         searchIndex si = new searchIndex();
44.
        si.index();
45.         List<User> userList = si.search(keyword);
46.
        request.setAttribute("userList", userList);
47.         request.getRequestDispatcher("search.jsp").forward(request, response);
48.
    }
49.
50.
}

[java] view
plain copy

1. package com.lucene.vo;
2.

3. public class User {
4.
    private Long id;
5.     private String name;
6.
    private String sex;
7.     private String dosomething;
8.
    private String email;
9.     private String content;
10.

11.     public Long getId() {
12.
        return id;
13.     }
14.
    public void setId(Long id) {
15.         this.id = id;
16.
    }
17.     public String getName() {
18.
        return name;
19.     }
20.
    public void setName(String name) {
21.         this.name = name;
22.
    }
23.     public String getSex() {
24.
        return sex;
25.     }
26.
    public void setSex(String sex) {
27.         this.sex = sex;
28.
    }
29.     public String getDosomething() {
30.
        return dosomething;
31.     }
32.
    public void setDosomething(String dosomething) {
33.         this.dosomething = dosomething;
34.
    }
35.     public String getEmail() {
36.
        return email;
37.     }
38.
    public void setEmail(String email) {
39.         this.email = email;
40.
    }
41.     public String getContent() {
42.
        return content;
43.     }
44.
    public void setContent(String content) {
45.         this.content = content;
46.
    }
47.
48.
}

[html] view
plain copy

1. <%@ page language="java" contentType="text/html; charset=UTF-8" pageEncoding="UTF-8"%>
2.
<%@taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c"%>
3. <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
4.
<html>
5. <head>
6.
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
7. <title>lucene 全文检索</title>
8.
</head>
9. <body style="text-align: center;">
10.
    <form action="searchServlet.do" method="post">
11.         <input type="text" name="keyword" /> <input type="submit" value="搜索" />
12.
    </form>
13.     <div style="height: 10px">
14.
    </div>
15.     <c:if test="${not empty userList}">
16.
        <div>相关信息：</div>
17.         <table border="1" align="center">
18.
            <tr>
19.                 <td>ID</td>
20.
                <td>姓名</td>
21.                 <td>性别</td>
22.
                <td>邮箱</td>
23.                 <td>爱好</td>
24.
                <td>正文</td>
25.             </tr>
26.
            <c:forEach items="${ userList}" var="user">
27.                 <tr>
28.
                    <td>${user.id }</td>
29.                     <td>${user.name }</td>
30.
                    <td>${user.sex }</td>
31.                     <td>${user.email }</td>
32.
                    <td>${user.dosomething }</td>
33.                     <td>${user.content }</td>
34.
                </tr>
35.             </c:forEach>
36.
        </table>
37.     </c:if>
38.
</body>
39. </html>

代码测试：

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签： 索引搜索 Lucene

相关文章推荐

新的分享

章节导航