您的位置:首页 > 其它

lucene创建index和搜索

2015-08-13 11:29 225 查看
package com.my.lucene.index;

import java.io.File;
import java.io.FileReader;
import java.io.IOException;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class TestIndex {

// 创建索引
public void index() {
// path:文档的路径,
File path = new File("d:\\lucene\\doc");
Directory indexpath = null;
IndexWriter writer = null;
// 1.创建Directory:indexpath:索引存放的路径
try {
indexpath = FSDirectory.open(new File("d:\\lucene\\index"));

// 2.创建indexwriter,参数:文档directory,分次器,大小
writer = new IndexWriter(indexpath, new StandardAnalyzer(
Version.LUCENE_30), true, MaxFieldLength.LIMITED);

// 3.创建document,将本地文档加载到document中
Document doc = null;
for (File files : path.listFiles()) {

doc = new Document();
// 本地文档内容添加到document -->索引中
// field的格式key-value
doc.add(new Field("content", new FileReader(files)));
doc.add(new Field("name", files.getName(), Store.YES,
Index.NOT_ANALYZED));
doc.add(new Field("path", files.getAbsolutePath(),
Field.Store.YES, Field.Index.NOT_ANALYZED));
// 4.将文档添加到索引中
writer.addDocument(doc);
}

} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} finally {
try {
writer.close();
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}

}

// 搜索

public void search() throws Exception {
// 1.创建directory:从哪里搜索,就是创建索引的路径
Directory indexpath = FSDirectory.open(new File("d:\\lucene\\index"));
// 2.创建indexReader:打开index
IndexReader reader = IndexReader.open(indexpath);
// 3.根据indexReader创建indexSearch
IndexSearcher search = new IndexSearcher(reader);
// 4.创建搜索的query;参数:版本号,域名称,就是在创建索引的时候制定的Field("key",..)的key值,分次器
QueryParser parser = new QueryParser(Version.LUCENE_30, "content",
new StandardAnalyzer(Version.LUCENE_30));
// parse的参数就是要搜索的内容,在文档中搜索java字段
Query query = parser.parse("java");

// 5.根据serach搜索并返回topdocs的文档:参数:query对象,搜索的条数
TopDocs docs = search.search(query, 10);
// 6.根据topdocs获取scoredoc
ScoreDoc[] sc = docs.scoreDocs;

for (ScoreDoc sd : sc) {

// 7.根据search和scoredocs获取具体的document
// d为文档的id,sd类似于数据库中的rs结果集,通过sd.doc得到文档的句柄(是创建索引时候生成的)
// 通过文档的id得到具体的文档,从而得到文档内容
Document d = search.doc(sd.doc);
// 8.根据document获取具体的值
// d.get()参数就是field的key
System.out.println(d.get("name"));
System.out.println(d.get("path"));
}

}

}


测试代码:

package com.my.lucene.test;

import org.junit.Test;

import com.my.lucene.index.TestIndex;

public class MainTest {
@Test
public void TestIndexJunit() throws Exception{
TestIndex indexs = new TestIndex();
//indexs.index();

indexs.search();

}

}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: