您的位置:首页 > 编程语言 > Java开发

自己动手写搜索引擎(常搜吧历程三#搜索二#)(Java、Lucene、hadoop)

2013-03-17 21:45 609 查看
接着上一节搜索一,我们来深入学习Lucene下搜索的其他功能。

Lucene分页搜索的实现:

package com.qianyan.lucene;

import java.io.IOException;

import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

public class TestSeacher3 {

public static void main(String[] args) throws IOException {
String indexDir = "E:/luceneindex";
Directory dir = FSDirectory.getDirectory(indexDir);
IndexSearcher searcher = new IndexSearcher(dir);
ScoreDoc[] hits = null;

Term term = new Term("contents", "ontology");
TermQuery query = new TermQuery(term);
TopDocs topDocs = searcher.search(query, 126);
int eachPageNumber = 10;	//每页显示记录数
int pageNumber = 3;			//当前页
hits = topDocs.scoreDocs;

for(int i = (pageNumber - 1) * eachPageNumber; i < pageNumber * eachPageNumber; i++){
Document doc = searcher.doc(hits[i].doc);
System.out.print(hits[i].score);
System.out.println(doc.get("contents"));
}

searcher.close();
dir.close();
}
}


搜索中使用RamDirectory:

package com.qianyan.lucene;

import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;

public class TestRamSearch {

Directory directory = new RAMDirectory();

public void createRamIndex() throws IOException{
String[] ids = {"1", "2", "3", "4"};
String[] names = {"zhangsan", "lisi", "wangwu", "zhaoliu"};
String[] addresses = {"shanghai", "beijing", "guangzhou", "nanjing"};
String[] birthdays = {"19820720", "19840203", "19770409", "19830130"};
Analyzer analyzer = new StandardAnalyzer();
IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
for(int i = 0; i < ids.length; i++){
Document document = new Document();
document.add(new Field("id", ids[i], Field.Store.YES, Field.Index.ANALYZED));
document.add(new Field("name", names[i], Field.Store.YES, Field.Index.ANALYZED));
document.add(new Field("address", addresses[i], Field.Store.YES, Field.Index.ANALYZED));
document.add(new Field("birthday", birthdays[i], Field.Store.YES, Field.Index.ANALYZED));
writer.addDocument(document);
}
writer.optimize();
writer.close();
}

public void searchRam() throws IOException{
IndexSearcher searcher = new IndexSearcher(directory);
ScoreDoc[] hits = null;

Term term = new Term("name", "zhangsan");
TermQuery query = new TermQuery(term);
TopDocs topDocs = searcher.search(query, 126);

hits = topDocs.scoreDocs;

for(int i = 0; i < hits.length; i++){
Document doc = searcher.doc(hits[i].doc);
//System.out.println(hits[i].score);
System.out.print(doc.get("id") + " ");
System.out.print(doc.get("name") + " ");
System.out.print(doc.get("address") + " ");
System.out.println(doc.get("birthday") + " ");
}

searcher.close();
directory.close();
}

public static void main(String[] args) throws IOException {
TestRamSearch trs = new TestRamSearch();
trs.createRamIndex();
trs.searchRam();
}
}


QueryParser的使用:

1、默认字段的检索

2、在指定字段中检索

3、在默认字段进行“或”检索

4、对默认字段进行“与”检索

5、AND NOT 的检索

6、默认字段中前缀检索

7、对短语进行检索

package com.qianyan.lucene;

import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

public class TestQueryParser {

public static void main(String[] args) throws IOException, ParseException {
Analyzer analyzer = new StandardAnalyzer();
String indexDir = "E:/luceneindex";
Directory dir = FSDirectory.getDirectory(indexDir);
IndexSearcher searcher = new IndexSearcher(dir);
ScoreDoc[] hits = null;

QueryParser parser = new QueryParser("address", analyzer);		//name为默认字段检索
//Query query = parser.parse("address:resides in");			//支持短语搜索
//Query query = parser.parse("birthday:[19820720 TO 19840203]"); //中括号包含首尾,花括号不包含。TO指范围
//Query query = parser.parse("zhangsan~");	//前缀检索
//Query query = parser.parse("shanghai beijing");	//"或"形式有三种:1、空格:shanghai beijing 3、or关键字:shanghai or beijing
//Query query = parser.parse("shanghai and beijing");	//"与"形式有三种:1、+号:shanghai beijing 2、and关键字:shanghai and beijing
//Query query = parser.parse("address:shanghai beijing AND NOT name:wangwu");	//AND NOT 不满足 ; "-"号也等于 AND NOT
Query query = parser.parse("name:li*"); //前缀检索
TopDocCollector topdoc = new TopDocCollector(100);

searcher.search(query, topdoc);
hits = topdoc.topDocs().scoreDocs;

for(int i = 0; i < hits.length; i++){
Document doc = searcher.doc(hits[i].doc);
//System.out.println(hits[i].score);
System.out.print(doc.get("id") + " ");
System.out.print(doc.get("name") + " ");
System.out.print(doc.get("address") + " ");
System.out.println(doc.get("birthday") + " ");
}

searcher.close();
dir.close();
}
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: 
相关文章推荐