您的位置：首页 > 其它

Lucene的增删查改

2016-07-04 00:06 357 查看

package com.qianyan.lucene;

import java.io.File;

import java.io.IOException;

import org.apache.commons.io.FileUtils;

import org.apache.lucene.analysis.TokenStream;

import org.apache.lucene.analysis.standard.StandardAnalyzer;

import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;

import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;

import org.apache.lucene.document.Document;

import org.apache.lucene.document.Field;

import org.apache.lucene.document.Field.Store;

import org.apache.lucene.document.LongField;

import org.apache.lucene.document.StoredField;

import org.apache.lucene.document.TextField;

import org.apache.lucene.index.DirectoryReader;

import org.apache.lucene.index.IndexReader;

import org.apache.lucene.index.IndexWriter;

import org.apache.lucene.index.IndexWriterConfig;

import org.apache.lucene.index.Term;

import org.apache.lucene.search.IndexSearcher;

import org.apache.lucene.search.Query;

import org.apache.lucene.search.ScoreDoc;

import org.apache.lucene.search.TermQuery;

import org.apache.lucene.search.TopDocs;

import org.apache.lucene.store.Directory;

import org.apache.lucene.store.FSDirectory;

import org.apache.lucene.util.Version;

import org.junit.Test;

import org.wltea.analyzer.lucene.IKAnalyzer;

/**

* 说明：该环境至少是jdk1.7

* @author user

*

*/

public class LuceneManager {

@Test
public void creteDump() throws IOException{

//先指定索引库存放的位置
String dumppath = "d:\\LuceneDBDB\\indigo";
Directory directory = FSDirectory.open(new File(dumppath));
//索引库存放在内存中 (效率高，但是不能持久化，我们把服务一关，数据就没了)
//Directory directory = new RAMDirectory();
//指定分析器
StandardAnalyzer standardAnalyzer = new StandardAnalyzer();
IndexWriterConfig config = new IndexWriterConfig(Version.LATEST,standardAnalyzer);
//创建Indexwriter对象
IndexWriter indexWriter = new IndexWriter(directory,config);

//读取歌词并创建Document对象
File musicDir = new File("D:\\Lucene\\歌词");
for (File f:musicDir.listFiles()) {
//判断是否是文件
if(f.isFile()){
//创建Document对象
Document document = new Document();
//创建域
//文件名称
Field filename = new TextField("filename", f.getName(), Store.YES);
//文件内容
String contString = FileUtils.readFileToString(f);
Field fieldContent = new TextField("content",contString,Store.YES);
//文件路径
Field fieldPath = new StoredField("path", f.getPath());
//文件的大小
Field fieldSize = new LongField("size", FileUtils.sizeOf(f), Store.YES);
//把域添加到Document中
document.add(filename);
document.add(fieldContent);
document.add(fieldPath);
document.add(fieldSize);
//把Document写入索引库
indexWriter.addDocument(document);
}
}
//关闭indexwriter
indexWriter.close();
}

@Test
public void queryIndex() throws IOException {
//先指定索引库存放的位置
String filepath = "d:\\LuceneDBDB\\indigo";
Directory directory = FSDirectory.open(new File(filepath));
//创建IndexReader
IndexReader indexReader = DirectoryReader.open(directory);
//使用IndexSearcher查询
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
//创建一个查询
Query query = new TermQuery(new Term("filename", "love"));
//执行查询
TopDocs topDocs = indexSearcher.search(query, 10);
//取查询结果的总数量
System.out.println(topDocs.totalHits);

for (ScoreDoc scoreDoc:topDocs.scoreDocs) {
Document document = indexSearcher.doc(scoreDoc.doc);
//从document中取出域的内容
System.out.println(document.get("filename"));
System.out.println(document.get("content"));
System.out.println(document.get("path"));
System.out.println(document.get("size"));
}
}

@Test
public void testTokenStream() throws IOException {
//创建一个分析器对象

// StandardAnalyzer analyzer = new StandardAnalyzer();

// CJKAnalyzer analyzer = new CJKAnalyzer();

// SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
IKAnalyzer analyzer = new IKAnalyzer();
//获得TokenStream

// TokenStream tokenStream = standardAnalyzer.tokenStream("test", "Will meit away[00:50.00]And then a hero comes along");
TokenStream tokenStream = analyzer.tokenStream("test", "Tokenizer是分词器，负责将reader转换为语汇单元即进行分词，Lucene提供了很多的分词器，也可以使用第三方的分词，比如IKAnalyzer一个中文分词器。\n" +
"tokenFilter是分词过滤器，负责对语汇单元进行过滤庖丁解牛不知所云习大大，完美世界tokenFilter可以是一个过滤器链儿，Lucene提供了很多的分词器过滤器，比如大小写转换、去除停用词等。");
//TokenStream tokenStream = analyzer.tokenStream("test", "Do it right");
//查看关键词属性
CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
//偏移量属性
OffsetAttribute offsetAttribute = tokenStream.addAttribute(OffsetAttribute.class);
//重置tokenstream
tokenStream.reset();
while(tokenStream.incrementToken()) {
System.out.println("start->" + offsetAttribute.startOffset());
System.out.println(charTermAttribute);
System.out.println("end->" + offsetAttribute.endOffset());
}
tokenStream.close();
}

@Test
public void createIndex() throws IOException{
//先指定索引库存放的位置
String filepath = "d:\\LuceneDBDB\\indigo";
Directory directory = FSDirectory.open(new File(filepath));
//指定分析器
IKAnalyzer analyzer = new IKAnalyzer();
IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, analyzer);
//创建indexwriter对象
IndexWriter indexWriter = new IndexWriter(directory, config);
//读取歌词并创建Document对象
File musicDir = new File("D:\\Lucene\\歌词");
for (File f:musicDir.listFiles()) {
//判断是否是文件
if(f.isFile()){
//创建Document对象
Document document = new Document();
//创建域
//文件名称
Field filename = new TextField("filename", f.getName(), Store.YES);
//文件内容
String contString = FileUtils.readFileToString(f);
Field fieldContent = new TextField("content",contString,Store.YES);
//文件路径
Field fieldPath = new StoredField("path", f.getPath());
//文件的大小
Field fieldSize = new LongField("size", FileUtils.sizeOf(f), Store.YES);
//把域添加到Document中
document.add(filename);
document.add(fieldContent);
document.add(fieldPath);
document.add(fieldSize);
//把Document写入索引库
indexWriter.addDocument(document);
}
}
//关闭indexwriter
indexWriter.close();

}

/**
* 封装返回IndexWriter对象
* @return
*/
private IndexWriter getIndexWriter(){

//先指定索引库存放的位置
String filepath = "d:\\LuceneDBDB\\indigo";
Directory directory;
IndexWriter indexWriter = null;
try {
directory = FSDirectory.open(new File(filepath));
//指定分析器
IKAnalyzer analyzer = new IKAnalyzer();
IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, analyzer);
//创建indexwriter对象
indexWriter = new IndexWriter(directory, config);

} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return indexWriter;
}

/**
* 向索引库中添加一个document
* @throws IOException
*/
@Test
public void addOneDocument() throws IOException {
IndexWriter indexWriter = getIndexWriter();
//向索引库中添加一个文档
Document document = new Document();
//创建域
Field fieldName = new TextField("filename", "我新添加的一个文档的标题",Store.YES);
Field fieldContent = new TextField("content", "这是文档的内容，我就不告诉你是什么", Store.YES);
Field fieldContent2 = new TextField("content", "使用索引目录和配置管理类创建索引器", Store.YES);
document.add(fieldName);
document.add(fieldContent);
document.add(fieldContent2);
//添加到索引库中
indexWriter.addDocument(document);
indexWriter.close();
//indexWriter.commit(); 要是后面还想用，那就提交就行。
}

/**
* 删除所有文档
* @throws IOException
*/
@Test
public void deleteAllIndex() throws IOException{
IndexWriter indexWriter = getIndexWriter();
indexWriter.deleteAll();
indexWriter.commit();
}

/**
* 删除指定文档
*/
@Test
public void deleteIndex() throws IOException {
IndexWriter indexWriter = getIndexWriter();
//创建一个查询
Query query = new TermQuery(new Term("filename","love"));
indexWriter.deleteDocuments(query);
indexWriter.commit();
}

}

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签： Lucene 增删查改

相关文章推荐

新的分享

章节导航