【Lucene3.6.2入门系列】第08节_高级搜索之自定义评分
2013-08-19 13:50
495 查看
package com.jadyer.lucene; import java.io.File; import java.io.IOException; import java.util.Random; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.NumericField; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import com.jadyer.custom.MyNameScoreQuery; /** * 【Lucene3.6.2入门系列】第08节_高级搜索之自定义评分 * @create Aug 19, 2013 12:13:14 PM * @author 玄玉<http://blog.csdn.net/jadyer> */ public class AdvancedSearchByScore { private Directory directory; private IndexReader reader; public AdvancedSearchByScore(){ /**文件大小*/ int[] sizes = {90, 10, 20, 10, 60, 50}; /**文件名*/ String[] names = {"Michael.java", "Scofield.ini", "Tbag.txt", "Jack", "Jade", "Jadyer"}; /**文件内容*/ String[] contents = {"my java blog is http://blog.csdn.net/jadyer", "my Java Website is http://www.jadyer.cn", "my name is jadyer", "I am a Java Developer", "I am from Haerbin", "I like java of Lucene"}; IndexWriter writer = null; Document doc = null; try { directory = FSDirectory.open(new File("myExample/01_index/")); writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36, new StandardAnalyzer(Version.LUCENE_36))); writer.deleteAll(); for(int i=0; i<sizes.length; i++){ doc = new Document(); doc.add(new NumericField("size", Field.Store.YES, true).setIntValue(sizes[i])); doc.add(new Field("name", names[i], Field.Store.YES, Field.Index.ANALYZED_NO_NORMS)); doc.add(new Field("content", contents[i], Field.Store.NO, Field.Index.ANALYZED)); //添加一个评分域,专门在自定义评分时使用 //此时默认为Field.Store.NO和Field.Index.ANALYZED_NO_NORMS doc.add(new NumericField("fileScore").setIntValue(new Random().nextInt(600))); writer.addDocument(doc); } } catch (Exception e) { e.printStackTrace(); } finally { if(null != writer){ try { writer.close(); } catch (IOException ce) { ce.printStackTrace(); } } } } /** * 获取IndexReader实例 */ private IndexReader getIndexReader(){ try { if(reader == null){ reader = IndexReader.open(directory); }else{ //if the index was changed since the provided reader was opened, open and return a new reader; else,return null //如果当前reader在打开期间index发生改变,则打开并返回一个新的IndexReader,否则返回null IndexReader ir = IndexReader.openIfChanged(reader); if(ir != null){ reader.close(); //关闭原reader reader = ir; //赋予新reader } } return reader; }catch(Exception e) { e.printStackTrace(); } return null; //发生异常则返回null } /** * 自定义评分搜索 */ public void searchByCustomScoreQuery(){ IndexSearcher searcher = new IndexSearcher(this.getIndexReader()); // //创建一个评分域 // FieldScoreQuery fsq = new FieldScoreQuery("fileScore", FieldScoreQuery.Type.INT); // //创建自定义的CustomScoreQuery对象 // Query query = new MyCustomScoreQuery(new TermQuery(new Term("content", "java")), fsq); Query query = new MyNameScoreQuery(new TermQuery(new Term("content", "java"))); try { TopDocs tds = searcher.search(query, 10); for(ScoreDoc sd : tds.scoreDocs){ Document doc = searcher.doc(sd.doc); System.out.print("文档编号=" + sd.doc + " 文档权值=" + doc.getBoost() + " 文档评分=" + sd.score + " "); System.out.println("size=" + doc.get("size") + " name=" + doc.get("name")); } } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { if(searcher != null){ try { searcher.close(); } catch (IOException e) { e.printStackTrace(); } } } } /** * 测试一下评分效果 */ public static void main(String[] args) { new AdvancedSearchByScore().searchByCustomScoreQuery(); } }
下面是我们自定义的评分类MyCustomScoreQuery.java
package com.jadyer.custom; import java.io.IOException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Query; import org.apache.lucene.search.function.CustomScoreProvider; import org.apache.lucene.search.function.CustomScoreQuery; import org.apache.lucene.search.function.ValueSourceQuery; /** * 自定义评分的步骤 * @see 1)创建一个类继承于CustomScoreQuery * @see 2)覆盖CustomScoreQuery.getCustomScoreProvider()方法 * @see 3)创建一个类继承于CustomScoreProvider * @see 4)覆盖CustomScoreProvider.customScore()方法(我们的自定义评分主要就是在此方法中完成的) * @create Aug 6, 2013 10:30:46 AM * @author 玄玉<http://blog.csdn.net/jadyer> */ public class MyCustomScoreQuery extends CustomScoreQuery { private static final long serialVersionUID = -2373017691291184609L; public MyCustomScoreQuery(Query subQuery, ValueSourceQuery valSrcQuery) { //ValueSourceQuery参数就是指专门用来做评分的Query,即评分域的FieldScoreQuery super(subQuery, valSrcQuery); } @Override protected CustomScoreProvider getCustomScoreProvider(IndexReader reader) throws IOException { //如果直接返回super的,就表示使用原有的评分规则,即通过[原有的评分*传入的评分域所获取的评分]来确定最终评分 //return super.getCustomScoreProvider(reader); return new MyCustomScoreProvider(reader); } private class MyCustomScoreProvider extends CustomScoreProvider { public MyCustomScoreProvider(IndexReader reader) { super(reader); } @Override public float customScore(int doc, float subQueryScore, float valSrcScore) throws IOException { //subQueryScore--表示默认文档的打分,valSrcScore--表示评分域的打分 //该方法的返回值就是文档评分,即ScoreDoc.score获取的结果 System.out.println("subQueryScore=" + subQueryScore + " valSrcScore=" + valSrcScore); return subQueryScore/valSrcScore; } } }
下面是自定义的采用特殊文件名作为评分标准的评分类MyNameScoreQuery.java
package com.jadyer.custom; import java.io.IOException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.FieldCache; import org.apache.lucene.search.Query; import org.apache.lucene.search.function.CustomScoreProvider; import org.apache.lucene.search.function.CustomScoreQuery; /** * 采用特殊文件名作为评分标准 * @create Aug 6, 2013 2:17:13 PM * @author 玄玉<http://blog.csdn.net/jadyer> */ public class MyNameScoreQuery extends CustomScoreQuery { private static final long serialVersionUID = -2813985445544972520L; public MyNameScoreQuery(Query subQuery) { //由于这里是打算根据文件名来自定义评分,所以重写构造方法时不必传入评分域的ValueSourceQuery super(subQuery); } @Override protected CustomScoreProvider getCustomScoreProvider(IndexReader reader) throws IOException { return new FilenameScoreProvider(reader); } private class FilenameScoreProvider extends CustomScoreProvider { String[] filenames; public FilenameScoreProvider(IndexReader reader) { super(reader); try { //在IndexReader没有关闭之前,所有的数据都会存储到一个预缓存中(缺点是占用大量内存) //所以我们可以通过预缓存获取name域的值(获取到的是name域所有值,故使用数组) this.filenames = FieldCache.DEFAULT.getStrings(reader, "name"); } catch (IOException e) { e.printStackTrace(); } } @Override public float customScore(int doc, float subQueryScore, float valSrcScore) throws IOException { //由于FilenameScoreQuery构造方法没有传入ValueSourceQuery,故此处ValueSourceQuery默认为1.0 System.out.println("subQueryScore=" + subQueryScore + " valSrcScore=" + valSrcScore); if(filenames[doc].endsWith(".java") || filenames[doc].endsWith(".ini")){ //只加大java文件和ini文件的评分 return subQueryScore*1.5f; }else{ return subQueryScore/1.5f; } } } }
相关文章推荐
- 【Lucene3.6.2入门系列】第08节_高级搜索之自定义评分
- 【Lucene3.6.2入门系列】第09节_高级搜索之自定义QueryParser
- 【Lucene3.6.2入门系列】第07节_高级搜索之普通Filter和自定义Filter
- Lucene 3.6.2入门:高级搜索之自定义评分
- 【Lucene3.6.2入门系列】第07节_高级搜索之普通Filter和自定义Filter
- Lucene 3.6.2入门系列:高级搜索之自定义QueryParser
- 【Lucene3.6.2入门系列】第09节_高级搜索之自定义QueryParser
- 【Lucene3.6.2入门系列】第06节_高级搜索之排序
- 【Lucene3.6.2入门系列】第06节_高级搜索之排序
- Lucene 3.6.2入门:高级搜索之普通Filter和自定义Filter
- 【Lucene3.6.2入门系列】第12节_近实时搜索
- 【Lucene3.6.2入门系列】第03节_简述Lucene中常见的搜索功能
- 【Lucene3.6.2入门系列】第03节_简述Lucene中常见的搜索功能
- 【Lucene3.6.2入门系列】第03节_简述Lucene中常见的搜索功能
- Lucene 3.6.2入门:高级搜索之排序
- 【Lucene3.6.2入门系列】第05节_自定义停用词分词器和同义词分词器
- 【Lucene3.6.2入门系列】第14节_SolrJ操作索引和搜索文档以及整合中文分词
- 【Lucene3.6.2入门系列】第05节_自定义停用词分词器和同义词分词器
- 【Lucene3.6.2入门系列】第12节_近实时搜索
- 【Lucene3.6.2入门系列】第05节_自定义停用词分词器和同义词分词器