您的位置:首页 > 其它

Lucene创建索引入门

2016-09-28 23:06 411 查看
最近在学习lucene,参考网上的资料写了一个简单搜索demo;

项目jar包:



//索引关键类

package com.lucene.index;

import java.io.File;

import java.io.IOException;

import java.io.StringReader;

import java.util.ArrayList;

import java.util.List;

import org.apache.lucene.analysis.Analyzer;

import org.apache.lucene.analysis.TokenStream;

import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;

import org.apache.lucene.document.Document;

import org.apache.lucene.document.Field;

import org.apache.lucene.index.CorruptIndexException;

import org.apache.lucene.index.IndexReader;

import org.apache.lucene.index.IndexWriter;

import org.apache.lucene.index.IndexWriterConfig;

import org.apache.lucene.queryParser.ParseException;

import org.apache.lucene.queryParser.QueryParser;

import org.apache.lucene.search.IndexSearcher;

import org.apache.lucene.search.Query;

import org.apache.lucene.search.TopDocs;

import org.apache.lucene.store.Directory;

import org.apache.lucene.store.FSDirectory;

import org.apache.lucene.store.LockObtainFailedException;

import org.apache.lucene.util.Version;

import org.wltea.analyzer.lucene.IKAnalyzer;

import com.lucene.vo.User;

/**

 *  * lucene 检索内存索引 非常简单的例子  *  * @author Administrator  *  

 */

public class searchIndex {

 private String[] ids = { "1", "2", "3", "4", "5", "6" };

 private String[] emails = { "aa@itat.org", "bb@itat.org", "cc@cc.org", "dd@sina.org", "ee@zttc.edu", "ff@itat.org" };

// private String[] contents = { "welcome to visited the space,I like book", "hello boy, I like pingpeng ball", "my name is cc I like game", "I like football",

//   "I like football and I like basketball too", "I like movie and swim" };

 private String[] contents = { "创建一个内存目录对象,所以这里生成的索引会放在磁盘中,而不是在内存中", "创建索引写入对象,该对象既可以把索引写入到磁盘中也可以写入到内存中", "分词器,分词器就是将检索的关键字分割成一组组词组, 它是lucene检索查询的一大特色之一", "这个是分词器拆分最大长度,因为各种不同类型的分词器拆分的字符颗粒细化程度不一样,所以需要设置一个最长的拆分长度",

   "文档对象,在lucene中创建的索引可以看成数据库中的一张表,表中也可以有字段,往里面添加内容之后可以根据字段去匹配查询", "I like movie and swim" };

 private String[] names = { "zhangsan", "lisi", "john", "jetty", "mike", "jake" };

 // 创建一个内存目录对象,所以这里生成的索引会放在磁盘中,而不是在内存中。

 private Directory directory = null;

 //IK分词器

 IKAnalyzer analyzer = null;

 public searchIndex() {

  try {

   directory = FSDirectory.open(new File("H:/lucene/index"));

   analyzer = new IKAnalyzer(true);

  } catch (IOException e) {

   // TODO Auto-generated catch block

   e.printStackTrace();

  }

 }

 

 public void index() {

  /*

  * 创建索引写入对象,该对象既可以把索引写入到磁盘中也可以写入到内存中。

  */

  IndexWriter writer;

  try {

   writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36, analyzer));

   //创建之前先删除

   writer.deleteAll();

   // 创建Document

   // 文档对象,在lucene中创建的索引可以看成数据库中的一张表,表中也可以有字段,往里面添加内容之后可以根据字段去匹配查询

 

   Document doc =null;

   

   for(int i=0;i<ids.length;i++){

    doc = new Document();

    doc.add(new Field("id", ids[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));

    doc.add(new Field("email", emails[i], Field.Store.YES, Field.Index.NOT_ANALYZED));

    doc.add(new Field("content", contents[i], Field.Store.NO, Field.Index.ANALYZED));

    doc.add(new Field("name", names[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));

    writer.addDocument(doc);

   }

   writer.close();

  } catch (CorruptIndexException e) {

   // TODO Auto-generated catch block

   e.printStackTrace();

  } catch (LockObtainFailedException e) {

   // TODO Auto-generated catch block

   e.printStackTrace();

  } catch (IOException e) {

   // TODO Auto-generated catch block

   e.printStackTrace();

  }

 }

 public List<User> search(String keyword) {

  long startTime = System.currentTimeMillis();

  System.out.println("*****************检索开始**********************");

  List<User> userList = new ArrayList<User>();

  IndexReader reader;

  try {

   reader = IndexReader.open(directory);

   

   // 创建IndexSearcher 检索索引的对象,里面要传递上面写入的内存目录对象directory

   IndexSearcher searcher = new IndexSearcher(reader);

   // 根据搜索关键字 封装一个term组合对象,然后封装成Query查询对象

 

   QueryParser queryParser = new QueryParser(Version.LUCENE_36, "content", analyzer);

   Query query = queryParser.parse(keyword);

   

   // 去索引目录中查询,返回的是TopDocs对象,里面存放的就是上面放的document文档对象

   TopDocs rs = searcher.search(query, null, 10);

   long endTime = System.currentTimeMillis();

   System.out.println("总共花费" + (endTime - startTime) + "毫秒,检索到" + rs.totalHits + "条记录。");

   User user = null;

   for (int i = 0; i < rs.scoreDocs.length; i++) {

    // rs.scoreDocs[i].doc 是获取索引中的标志位id, 从0开始记录

    Document firstHit = searcher.doc(rs.scoreDocs[i].doc);

    user = new User();

    user.setId(Long.parseLong(firstHit.get("id")));

    user.setName(firstHit.get("name"));

    user.setSex(firstHit.get("sex"));

    user.setDosomething(firstHit.get("dosometing"));

    user.setEmail(firstHit.get("email"));

    user.setContent(firstHit.get("content"));

    userList.add(user);

//    System.out.println("name:" + firstHit.get("name"));

//    System.out.println("sex:" + firstHit.get("sex"));

//    System.out.println("dosomething:" + firstHit.get("dosometing"));

   }

   reader.close();

  } catch (CorruptIndexException e1) {

   // TODO Auto-generated catch block

   e1.printStackTrace();

  } catch (IOException e1) {

   // TODO Auto-generated catch block

   e1.printStackTrace();

  } catch (ParseException e) {

   // TODO Auto-generated catch block

   e.printStackTrace();

  }

 

  System.out.println("*****************检索结束**********************");

  return userList;

 }

 

}

(********************************************【控制层调用方法】***********************************************************************************

@RequestMapping("/keyword")

    public String luceneINFO(HttpServletRequest request,HttpServletResponse response){

        String keyword = request.getParameter("keyword");

          if("".equals(keyword)){

           keyword="0";

          }

          searchIndex si = new searchIndex();

          si.index();

          List<User> userList = si.search(keyword);

          request.setAttribute("userList", userList);

        return "index";

    }
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: