Lucene的简单使用
2015-07-02 11:36
302 查看
Lucene的简单使用:在进行检索的时候是需要原来已经创建了索引才能检索到需要的内容的。所以在使用Lucene的时候大体就是两个方面,建立索引和检索,其次就是一些不能的策略了!
1.建立索引
Java代码
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class Writer {
public static void main(String args[]) throws IOException {
String indexDir = "E:\\index\\test";//索引存放的路径
String dataDir = "D:\\Backup\\Note";//这个是用来建立索引的数据源,此处是以一个一个的文件为例来建立索引的
Directory dir = FSDirectory.open(new File(indexDir));
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);//建立一个对应版本的分析器
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_31, analyzer);//这是用于在建立索引时配置信息的,默认是默认信息
IndexWriter indexWriter = new IndexWriter(dir, config);
File files[] = new File(dataDir).listFiles();
for (File file:files) {
Document doc = new Document();
Field field1 = new Field("fileName",file.getName(), Field.Store.YES, Field.Index.ANALYZED);
FileInputStream fis = new FileInputStream(file);
int len = fis.available();
byte[] bytes = new byte[len];
fis.read(bytes);
fis.close();
String content = new String(bytes);
Field field2 = new Field("content", content, Field.Store.YES, Field.Index.ANALYZED);
doc.add(field1);
doc.add(field2);
//现在流行的检索工具都是把每一个信息源看作一个Document来进行处理的
indexWriter.addDocument(doc);
indexWriter.optimize();
}
int docs = indexWriter.numDocs();
System.out.println("共索引了"+docs+"个文件!");
indexWriter.close();
}
}
2.进行检索
Java代码
import java.io.File;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class Searcher {
public static void main(String args[]) throws IOException, ParseException {
String indexDir = "E:\\index\\test";
Directory dir = FSDirectory.open(new File(indexDir));
IndexSearcher indexSearcher = new IndexSearcher(dir);
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);
// QueryParser queryParser = new QueryParser(Version.LUCENE_31, "content", analyzer);
QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_31, new String[] {"fileName","content"}, analyzer);
Query query = queryParser.parse("内 的 值");
TopScoreDocCollector collector = TopScoreDocCollector.create(3, true);//前面一个参数表示一次最多检索多少,这里检索的是永远从第一条开始
indexSearcher.search(query, collector);
TopDocs topDocs = collector.topDocs(2, 2);//第一个参数为从第多少个记录开始,第二个参数为每次取多少条记录
// TopDocs topDocs = indexSearcher.search(query, 10);
int totalHits = topDocs.totalHits;
System.out.println("totalHits:"+totalHits);
System.out.println();
ScoreDoc scoreDocs[] = topDocs.scoreDocs;
for (ScoreDoc scoreDoc:scoreDocs) {
float score = scoreDoc.score;
Document doc = indexSearcher.doc(scoreDoc.doc);
System.out.println("fileName:"+doc.get("fileName"));
// System.out.println("content:"+doc.get("content"));
System.out.println("score:"+score);
System.out.println();
}
indexSearcher.close();
}
}
1.建立索引
Java代码
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class Writer {
public static void main(String args[]) throws IOException {
String indexDir = "E:\\index\\test";//索引存放的路径
String dataDir = "D:\\Backup\\Note";//这个是用来建立索引的数据源,此处是以一个一个的文件为例来建立索引的
Directory dir = FSDirectory.open(new File(indexDir));
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);//建立一个对应版本的分析器
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_31, analyzer);//这是用于在建立索引时配置信息的,默认是默认信息
IndexWriter indexWriter = new IndexWriter(dir, config);
File files[] = new File(dataDir).listFiles();
for (File file:files) {
Document doc = new Document();
Field field1 = new Field("fileName",file.getName(), Field.Store.YES, Field.Index.ANALYZED);
FileInputStream fis = new FileInputStream(file);
int len = fis.available();
byte[] bytes = new byte[len];
fis.read(bytes);
fis.close();
String content = new String(bytes);
Field field2 = new Field("content", content, Field.Store.YES, Field.Index.ANALYZED);
doc.add(field1);
doc.add(field2);
//现在流行的检索工具都是把每一个信息源看作一个Document来进行处理的
indexWriter.addDocument(doc);
indexWriter.optimize();
}
int docs = indexWriter.numDocs();
System.out.println("共索引了"+docs+"个文件!");
indexWriter.close();
}
}
2.进行检索
Java代码
import java.io.File;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class Searcher {
public static void main(String args[]) throws IOException, ParseException {
String indexDir = "E:\\index\\test";
Directory dir = FSDirectory.open(new File(indexDir));
IndexSearcher indexSearcher = new IndexSearcher(dir);
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);
// QueryParser queryParser = new QueryParser(Version.LUCENE_31, "content", analyzer);
QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_31, new String[] {"fileName","content"}, analyzer);
Query query = queryParser.parse("内 的 值");
TopScoreDocCollector collector = TopScoreDocCollector.create(3, true);//前面一个参数表示一次最多检索多少,这里检索的是永远从第一条开始
indexSearcher.search(query, collector);
TopDocs topDocs = collector.topDocs(2, 2);//第一个参数为从第多少个记录开始,第二个参数为每次取多少条记录
// TopDocs topDocs = indexSearcher.search(query, 10);
int totalHits = topDocs.totalHits;
System.out.println("totalHits:"+totalHits);
System.out.println();
ScoreDoc scoreDocs[] = topDocs.scoreDocs;
for (ScoreDoc scoreDoc:scoreDocs) {
float score = scoreDoc.score;
Document doc = indexSearcher.doc(scoreDoc.doc);
System.out.println("fileName:"+doc.get("fileName"));
// System.out.println("content:"+doc.get("content"));
System.out.println("score:"+score);
System.out.println();
}
indexSearcher.close();
}
}
相关文章推荐
- 向Genymotion中添加文件时出现 Failed to push the item(s).错误
- ****php redis 的使用方法
- 七牛整合PHP上传文件
- mongodb 的备份恢复导入与导出
- Centos7 + Windows7 双系统
- Android成长日记-Android四大组件之Service组件的学习
- MySQL通过视图(或临时表)实现动态SQL(游标)
- Lucene4.10使用教程(一):常用概念
- [leetcode][桶排序] Maximum Gap
- 谷歌掐架甲骨文:揭秘Java侵权案始末
- Uva 10891 Game of Sum(区间博弈dp)
- MongoDB Windows环境安装及配置
- BaiduMap---百度地图官方Demo之离线地图功能(介绍如何下载和使用离线地图)
- Practical Advice on Running uClinux on Cortex-M3/M4
- php数组函数
- Xamarin.Android开发实践(四)
- 资讯:爱加密林魏:CTO应该这样做!
- spring mvc拦截器和<mvc:annotation-driven />的详解
- [LeetCode] Kth Smallest Element in a BST
- MySQL分页技术、6种分页方法总结