您的位置:首页 > 产品设计 > UI/UE

lucene3.6 中文分词 文件索引

2012-05-12 21:43 267 查看
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.File;
import java.util.Date;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;

public class HelloWord {

private static Analyzer luceneAnalyzer = new IKAnalyzer();
// private static Directory indexDir = new RAMDirectory();//内存索引
//private static Directory indexDir;
static String indexpath="E://index";   //创立索引的文件夹
public static void main(String[] args) throws Exception {
create();
search();
}
public static void create() throws Exception{  //createIndex
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35,luceneAnalyzer);
//config.setOpenMode(OpenMode.CREATE);//普通索引
config.setOpenMode(OpenMode.APPEND); //增量索引  第一次创建索引 用这个会出错
IndexWriter indexWriter = new IndexWriter(FSDirectory.open(new File(indexpath)),config);

BufferedReader reader = null;
try {
String file_path="e://files//";
String file_name="我爱的世界.txt";
System.out.println(1);
System.out.println(file_path+file_name);

File file = new File(file_path+file_name);//需要创立索引的文件
//File file = new File("e://files//爱我的人.txt");//需要创立索引的文件

reader = new BufferedReader(new FileReader(file));
String tempString = null;
int line = 1 ;
System.out.println("创建索引开始.....");
Date d1 = new Date();
while((tempString = reader.readLine()) != null) {
Document doc1 = new Document();
doc1.add(new Field("id",""+line,Store.YES,Field.Index.ANALYZED));
doc1.add(new Field("content",tempString,Store.YES,Field.Index.ANALYZED));
doc1.add(new Field("file_name",file_name,Store.YES,Field.Index.ANALYZED));

indexWriter.addDocument(doc1);
System.out.println("已创建 【" + line+ "】行");
line ++;
}
Date d2 = new Date();
System.out.println("创建索引完成!\n");
System.out.println("创建索引耗时:" + (d2.getTime()-d1.getTime()) +"ms");
reader.close();
}catch (IOException e) {
System.out.println("Read Error!");
}
indexWriter.close();
}
public static void search() throws Exception{
String queryString = "妈妈";
String[] fields = {"id","content"};
QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_35, fields, luceneAnalyzer);
Query query = queryParser.parse(queryString);
IndexReader reader = IndexReader.open(FSDirectory.open(new File(indexpath)));
//IndexReader reader = IndexReader.open(indexDir);
IndexSearcher searcher = new IndexSearcher(reader);
TopScoreDocCollector results = TopScoreDocCollector.create(10, false);
Date dt1 = new Date();
System.out.println("开始查询时间 :" +dt1.getTime());
System.out.println("查询关键字 : "+ queryString);
searcher.search(query, results);
Date dt2 = new Date();
System.out.println("结束查询时间 :" +dt2.getTime());
System.out.println();
System.out.println("查询耗时 :" + (dt2.getTime()-dt1.getTime()) + "ms");
TopDocs topDocs = results.topDocs(0, 10);    //显示查询结果前10条记录
System.out.println("命中数: " + topDocs.totalHits);

for(int j=0 ; j<topDocs.scoreDocs.length; j++) {
ScoreDoc scoreDoc = topDocs.scoreDocs[j];
Document doc = searcher.doc(scoreDoc.doc);
System.out.println(doc);
System.out.println("文件名:"+ doc.get("file_name") );
System.out.println("第 "+ doc.get("id") +" 行");
System.out.println("内容: " + doc.get("content"));
}
}
}


 
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息