Lucence搜索引擎
2016-05-11 00:00
363 查看
摘要: 简单介绍了搜索引擎lucence
添加需要索引的文件
1、创建索引库IndexWriter
2、根据文件创建文档Document
3、向索引库中写入文档内容
代码如下
package org.lucence;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class Create {
public static void main(String[] args) throws Exception {
//存放源数据
File dirFile = new File("E:/dir");
System.out.println(dirFile);
//存放索引
File indexFile = new File("E:/index");
//加载索引目录
FSDirectory fsDirectory = FSDirectory.open(indexFile);
Analyzer analyzer = new SmartChineseAnalyzer(Version.LUCENE_40);
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_40, analyzer);
config.setOpenMode(OpenMode.CREATE);
IndexWriter indexWriter = new IndexWriter(fsDirectory, config);
//获取需要索引的所有文件
File[] files = dirFile.listFiles();
System.out.println(files);
Integer cursor = 1;
for(File file:files){
Document doc = new Document();
if(file.getName().endsWith(".txt")){
//建立索引
doc.add(new StringField("id", String.valueOf(cursor++), Store.YES));
doc.add(new TextField("context", readFile(file),Store.YES));
}
FieldType type = new FieldType();
type.indexed();//索引
type.setStored(true);//存储
type.setTokenized(true);//标识
//添加到文档
indexWriter.addDocument(doc);
}
indexWriter.commit();
indexWriter.close();
}
private static String readFile(File file) throws Exception{
StringBuilder str = new StringBuilder();
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file), "gbk"));
String line = null;
while((line = br.readLine()) != null){
str.append(line);
}
br.close();
return str.toString();
}
}
索引搜索
1、创建Directory对象,索引文件夹
2、创建IndexSearch对象,建立查询(参数是Directory对象)
3、创建QueryParser对象(lucene版本,查询Field字段,所用分词器)
4、生成Query对象,由QueryParser对象的parse函数生成(参数是所查的关键字)
5、建立TopDocs对象(IndexSearch的search函数,参数是Query查询对象,)
6、TopDocs对象数组里存放查询信息
7、关闭IndexSearch
代码如下
package org.lucence;
import java.io.File;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
public class Search {
public static void main(String[] args) throws Exception {
File indexFile = new File("E:/index");
FSDirectory fsDirectory = FSDirectory.open(indexFile);
IndexReader indexReader = DirectoryReader.open(fsDirectory);
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
Query query = new TermQuery(new Term("context","你"));
TopDocs docs = indexSearcher.search(query,5);
System.out.println(docs.getMaxScore());
System.out.println(docs.totalHits);
for(ScoreDoc doc :docs.scoreDocs){
System.out.println(doc.doc);
Document document = indexSearcher.doc(doc.doc);
System.out.println(document.get("context"));
}
}
}
添加需要索引的文件
1、创建索引库IndexWriter
2、根据文件创建文档Document
3、向索引库中写入文档内容
代码如下
package org.lucence;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class Create {
public static void main(String[] args) throws Exception {
//存放源数据
File dirFile = new File("E:/dir");
System.out.println(dirFile);
//存放索引
File indexFile = new File("E:/index");
//加载索引目录
FSDirectory fsDirectory = FSDirectory.open(indexFile);
Analyzer analyzer = new SmartChineseAnalyzer(Version.LUCENE_40);
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_40, analyzer);
config.setOpenMode(OpenMode.CREATE);
IndexWriter indexWriter = new IndexWriter(fsDirectory, config);
//获取需要索引的所有文件
File[] files = dirFile.listFiles();
System.out.println(files);
Integer cursor = 1;
for(File file:files){
Document doc = new Document();
if(file.getName().endsWith(".txt")){
//建立索引
doc.add(new StringField("id", String.valueOf(cursor++), Store.YES));
doc.add(new TextField("context", readFile(file),Store.YES));
}
FieldType type = new FieldType();
type.indexed();//索引
type.setStored(true);//存储
type.setTokenized(true);//标识
//添加到文档
indexWriter.addDocument(doc);
}
indexWriter.commit();
indexWriter.close();
}
private static String readFile(File file) throws Exception{
StringBuilder str = new StringBuilder();
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file), "gbk"));
String line = null;
while((line = br.readLine()) != null){
str.append(line);
}
br.close();
return str.toString();
}
}
索引搜索
1、创建Directory对象,索引文件夹
2、创建IndexSearch对象,建立查询(参数是Directory对象)
3、创建QueryParser对象(lucene版本,查询Field字段,所用分词器)
4、生成Query对象,由QueryParser对象的parse函数生成(参数是所查的关键字)
5、建立TopDocs对象(IndexSearch的search函数,参数是Query查询对象,)
6、TopDocs对象数组里存放查询信息
7、关闭IndexSearch
代码如下
package org.lucence;
import java.io.File;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
public class Search {
public static void main(String[] args) throws Exception {
File indexFile = new File("E:/index");
FSDirectory fsDirectory = FSDirectory.open(indexFile);
IndexReader indexReader = DirectoryReader.open(fsDirectory);
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
Query query = new TermQuery(new Term("context","你"));
TopDocs docs = indexSearcher.search(query,5);
System.out.println(docs.getMaxScore());
System.out.println(docs.totalHits);
for(ScoreDoc doc :docs.scoreDocs){
System.out.println(doc.doc);
Document document = indexSearcher.doc(doc.doc);
System.out.println(document.get("context"));
}
}
}
相关文章推荐
- file限制上传文件的类型
- Hibernate+EhCache配置二级缓存
- 204LinkList
- R语言-常用命令
- 205LinkList
- 206Union
- 207MergeList
- 208MergeList
- BarTender条码条纹变细详解
- 域名和主机的选择对于关键字排名的影响
- 免费使用Windows版本的ZBrush的方法有哪些
- 递归查询用户的发展轨迹
- JavaScript入门
- Java继承
- 实操演练!MathType几个绝妙小技巧!
- MathType的这些公式技巧,你绝对没想到!
- SDWebImage源码研究与学习
- 第二次练车,练了一天倒库
- Xml序列化/Json序列化循环引用问题探索
- 二维码扫描