自己动手写搜索引擎(常搜吧历程三#搜索二#)(Java、Lucene、hadoop)
2013-03-17 21:45
609 查看
接着上一节搜索一,我们来深入学习Lucene下搜索的其他功能。
Lucene分页搜索的实现:
搜索中使用RamDirectory:
QueryParser的使用:
1、默认字段的检索
2、在指定字段中检索
3、在默认字段进行“或”检索
4、对默认字段进行“与”检索
5、AND NOT 的检索
6、默认字段中前缀检索
7、对短语进行检索
Lucene分页搜索的实现:
package com.qianyan.lucene; import java.io.IOException; import org.apache.lucene.document.Document; import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; public class TestSeacher3 { public static void main(String[] args) throws IOException { String indexDir = "E:/luceneindex"; Directory dir = FSDirectory.getDirectory(indexDir); IndexSearcher searcher = new IndexSearcher(dir); ScoreDoc[] hits = null; Term term = new Term("contents", "ontology"); TermQuery query = new TermQuery(term); TopDocs topDocs = searcher.search(query, 126); int eachPageNumber = 10; //每页显示记录数 int pageNumber = 3; //当前页 hits = topDocs.scoreDocs; for(int i = (pageNumber - 1) * eachPageNumber; i < pageNumber * eachPageNumber; i++){ Document doc = searcher.doc(hits[i].doc); System.out.print(hits[i].score); System.out.println(doc.get("contents")); } searcher.close(); dir.close(); } }
搜索中使用RamDirectory:
package com.qianyan.lucene; import java.io.IOException; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; public class TestRamSearch { Directory directory = new RAMDirectory(); public void createRamIndex() throws IOException{ String[] ids = {"1", "2", "3", "4"}; String[] names = {"zhangsan", "lisi", "wangwu", "zhaoliu"}; String[] addresses = {"shanghai", "beijing", "guangzhou", "nanjing"}; String[] birthdays = {"19820720", "19840203", "19770409", "19830130"}; Analyzer analyzer = new StandardAnalyzer(); IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); for(int i = 0; i < ids.length; i++){ Document document = new Document(); document.add(new Field("id", ids[i], Field.Store.YES, Field.Index.ANALYZED)); document.add(new Field("name", names[i], Field.Store.YES, Field.Index.ANALYZED)); document.add(new Field("address", addresses[i], Field.Store.YES, Field.Index.ANALYZED)); document.add(new Field("birthday", birthdays[i], Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(document); } writer.optimize(); writer.close(); } public void searchRam() throws IOException{ IndexSearcher searcher = new IndexSearcher(directory); ScoreDoc[] hits = null; Term term = new Term("name", "zhangsan"); TermQuery query = new TermQuery(term); TopDocs topDocs = searcher.search(query, 126); hits = topDocs.scoreDocs; for(int i = 0; i < hits.length; i++){ Document doc = searcher.doc(hits[i].doc); //System.out.println(hits[i].score); System.out.print(doc.get("id") + " "); System.out.print(doc.get("name") + " "); System.out.print(doc.get("address") + " "); System.out.println(doc.get("birthday") + " "); } searcher.close(); directory.close(); } public static void main(String[] args) throws IOException { TestRamSearch trs = new TestRamSearch(); trs.createRamIndex(); trs.searchRam(); } }
QueryParser的使用:
1、默认字段的检索
2、在指定字段中检索
3、在默认字段进行“或”检索
4、对默认字段进行“与”检索
5、AND NOT 的检索
6、默认字段中前缀检索
7、对短语进行检索
package com.qianyan.lucene; import java.io.IOException; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocCollector; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; public class TestQueryParser { public static void main(String[] args) throws IOException, ParseException { Analyzer analyzer = new StandardAnalyzer(); String indexDir = "E:/luceneindex"; Directory dir = FSDirectory.getDirectory(indexDir); IndexSearcher searcher = new IndexSearcher(dir); ScoreDoc[] hits = null; QueryParser parser = new QueryParser("address", analyzer); //name为默认字段检索 //Query query = parser.parse("address:resides in"); //支持短语搜索 //Query query = parser.parse("birthday:[19820720 TO 19840203]"); //中括号包含首尾,花括号不包含。TO指范围 //Query query = parser.parse("zhangsan~"); //前缀检索 //Query query = parser.parse("shanghai beijing"); //"或"形式有三种:1、空格:shanghai beijing 3、or关键字:shanghai or beijing //Query query = parser.parse("shanghai and beijing"); //"与"形式有三种:1、+号:shanghai beijing 2、and关键字:shanghai and beijing //Query query = parser.parse("address:shanghai beijing AND NOT name:wangwu"); //AND NOT 不满足 ; "-"号也等于 AND NOT Query query = parser.parse("name:li*"); //前缀检索 TopDocCollector topdoc = new TopDocCollector(100); searcher.search(query, topdoc); hits = topdoc.topDocs().scoreDocs; for(int i = 0; i < hits.length; i++){ Document doc = searcher.doc(hits[i].doc); //System.out.println(hits[i].score); System.out.print(doc.get("id") + " "); System.out.print(doc.get("name") + " "); System.out.print(doc.get("address") + " "); System.out.println(doc.get("birthday") + " "); } searcher.close(); dir.close(); } }
相关文章推荐
- 自己动手写搜索引擎(常搜吧历程三#搜索#)(Java、Lucene、hadoop)
- 自己动手写搜索引擎(常搜吧历程二#索引#)(Java、Lucene、hadoop)
- 自己动手写搜索引擎(常搜吧历程五#解析文档之XML#)(Java、Lucene、hadoop)
- 自己动手写搜索引擎(常搜吧历程七#解析文档之HTML#)(Java、Lucene、hadoop)
- 自己动手写搜索引擎(常搜吧历程一#认识Lucene#)(Java、Lucene、hadoop)
- 自己动手写搜索引擎(常搜吧历程四#分词#)(Java、Lucene、hadoop)
- 自己动手写搜索引擎(常搜吧历程七#解析文档之WORD#)(Java、Lucene、hadoop)
- 自己动手写搜索引擎(常搜吧历程四#分词#)(Java、Lucene、hadoop)
- 自己动手写搜索引擎(常搜吧历程六#解析文档之PDF#)(Java、Lucene、hadoop)
- 自己动手写搜索引擎(常搜吧项目展示)(Java、Lucene、hadoop)
- 自己动手写搜索引擎(常搜吧项目展示)(Java、Lucene、hadoop)
- 自己动手搭建搜索引擎(基于Nutch1.0) lucene 很简单,已成功
- 搜索引擎开发,垂直搜索开发探讨:蜘蛛,并行,搜索,垂直搜索,搜索开发,lucene,java,分布[原创]
- [导入]自己动手实现 lucene 搜索代码高亮显示
- 自己动手写搜索引擎之java爬虫
- 《自己动手建搜索引擎》日志分析类代码解析与修正为兼容lucene3.0.2
- 自己动手写搜索引擎之Lucene
- 自己动手实现 lucene 搜索代码高亮显示
- 全文检索技术 lucene(二) 自己动手写一个搜索引擎
- Hadoop学习之自己动手做搜索引擎【网络爬虫+倒排索引+中文分词】