lucene优先
2017-12-04 15:08
23 查看
lucene段合并 http://www.blogjava.net/tim-wu/archive/2008/02/06/179380.html http://forfuture1978.iteye.com/blog/609197 http://blog.csdn.net/i_mc_90/article/details/7778884
http://blog.itpub.net/28624388/viewspace-767812/ http://blog.csdn.net/lilinhai548/article/details/8570315
1,测试 MergePolicy
测试代码见MergePolicy.java
package com.ctrip.search.gsrest.test;
import java.io.File;
import java.util.List;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LogDocMergePolicy;
import org.apache.lucene.index.LogMergePolicy;
import org.apache.lucene.index.MergePolicy.MergeSpecification;
import org.apache.lucene.index.MergePolicy.OneMerge;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import com.ctrip.search.engine.index.IndexSimilarity;
import com.ctrip.search.schema.Schema;
import com.ctrip.search.schema.SchemaFactory;
import com.ctrip.search.util.IndexName;
public class MergePolicy {
public static void main(String[] args) {
try {
IndexName.set("gsrest");
Schema schema = SchemaFactory.get().get("gsrest");
//1MergePolicy配置
IndexWriterConfig indexWriterConfig =
new IndexWriterConfig(Version.LUCENE_34, schema.getIndexAnalyzer());
//LogMergePolicy mergePolicy = new LogByteSizeMergePolicy();
LogMergePolicy mergePolicy = new LogDocMergePolicy();
mergePolicy.setMergeFactor(3);
indexWriterConfig.setMergePolicy(mergePolicy);
indexWriterConfig.setSimilarity(new IndexSimilarity());
//2segmentinfo
//Directory dir = new RAMDirectory(FSDirectory.open(new File("D:\\temp\\realtime3\\realtime")));
Directory dir = FSDirectory.open(new File("D:\\temp\\realtime1\\realtime"));
SegmentInfos infos = new SegmentInfos();
//infos.read(dir, "segments_1");
infos.read(dir);
//System.out.println("info Version:" + infos.getVersion());
//System.out.println("info Counter:" + infos.counter);
System.out.println("SegmentInfos----------------------");
System.out.println("info Seg Count:" + infos.size());
for (int i = 0; i < infos.size(); i++) {
SegmentInfo info = infos.info(i);
System.out.println("****************** segment [" + i + "]");
System.out.println("segment name:" + info.name);
System.out.println("the doc count in segment:" + info.docCount);
//System.out .println("del doc count in segment:" + info.getDelCount());
//System.out.println("segment doc store offset:" + info.getDocStoreOffset());
}
//3List<OneMerge>
IndexWriter writer = new IndexWriter(dir, indexWriterConfig);
System.out.println("List<OneMerge>----------------------");
MergeSpecification mergeSpecification = mergePolicy.findMerges(infos);
List<OneMerge> merges =mergeSpecification.merges;
for (OneMerge oneMerge : merges) {
System.out.println("OneMergeCount:"+oneMerge.totalNumDocs()+"-----------");
List<SegmentInfo> segments = oneMerge.segments;
for (SegmentInfo segmentInfo : segments) {
System.out.println(segmentInfo.name+":"+segmentInfo.docCount);
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
测试结果:
SegmentInfos----------------------
info Seg Count:12
****************** segment [0]
segment name:_v
the doc count in segment:21128
****************** segment [1]
segment name:_n
the doc count in segment:49
****************** segment [2]
segment name:_w
the doc count in segment:667
****************** segment [3]
segment name:_x
the doc count in segment:1000
****************** segment [4]
segment name:_y
the doc count in segment:461
****************** segment [5]
segment name:_z
the doc count in segment:2000
****************** segment [6]
segment name:_10
the doc count in segment:2764
****************** segment [7]
segment name:_11
the doc count in segment:2236
****************** segment [8]
segment name:_12
the doc count in segment:2995
****************** segment [9]
segment name:_13
the doc count in segment:2343
****************** segment [10]
segment name:_14
the doc count in segment:2314
****************** segment [11]
segment name:_15
the doc count in segment:397
List<OneMerge>----------------------
OneMergeCount:1716-----------
_n:49
_w:667
_x:1000
OneMergeCount:5225-----------
_y:461
_z:2000
_10:2764
OneMergeCount:7574-----------
_11:2236
_12:2995
_13:2343
2 测试optimize
测试代码见TestOptimize.java
package com.ctrip.search.gsrest.test;
import java.io.File;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import com.ctrip.search.engine.index.IndexSimilarity;
import com.ctrip.search.schema.Schema;
import com.ctrip.search.schema.SchemaFactory;
import com.ctrip.search.util.IndexName;
public class TestOptimize {
public static void main(String[] args) {
try {
IndexName.set("gsrest");
Schema schema = SchemaFactory.get().get("gsrest");
IndexWriterConfig indexWriterConfig =new IndexWriterConfig(Version.LUCENE_34, schema.getIndexAnalyzer());
indexWriterConfig.setSimilarity(new IndexSimilarity());
Directory dir = new RAMDirectory(FSDirectory.open(new File("D:\\temp\\realtime1\\realtime")));
//Directory dir = FSDirectory.open(new File("D:\\temp\\realtime3\\realtime"));
IndexWriter writer = new IndexWriter(dir, indexWriterConfig);
a36b
SegmentInfos segmentInfos = new SegmentInfos();
segmentInfos.read(writer.getDirectory());
if (segmentInfos!=null) {
System.out.println("segmentInfos size = "+segmentInfos.size());
}
writer.optimize();
writer.commit();
segmentInfos.read(writer.getDirectory());
if (segmentInfos!=null) {
System.out.println("segmentInfos size = "+segmentInfos.size());
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
测试结果:
segmentInfos size = 12
segmentInfos size = 1
http://blog.itpub.net/28624388/viewspace-767812/ http://blog.csdn.net/lilinhai548/article/details/8570315
1,测试 MergePolicy
测试代码见MergePolicy.java
package com.ctrip.search.gsrest.test;
import java.io.File;
import java.util.List;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LogDocMergePolicy;
import org.apache.lucene.index.LogMergePolicy;
import org.apache.lucene.index.MergePolicy.MergeSpecification;
import org.apache.lucene.index.MergePolicy.OneMerge;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import com.ctrip.search.engine.index.IndexSimilarity;
import com.ctrip.search.schema.Schema;
import com.ctrip.search.schema.SchemaFactory;
import com.ctrip.search.util.IndexName;
public class MergePolicy {
public static void main(String[] args) {
try {
IndexName.set("gsrest");
Schema schema = SchemaFactory.get().get("gsrest");
//1MergePolicy配置
IndexWriterConfig indexWriterConfig =
new IndexWriterConfig(Version.LUCENE_34, schema.getIndexAnalyzer());
//LogMergePolicy mergePolicy = new LogByteSizeMergePolicy();
LogMergePolicy mergePolicy = new LogDocMergePolicy();
mergePolicy.setMergeFactor(3);
indexWriterConfig.setMergePolicy(mergePolicy);
indexWriterConfig.setSimilarity(new IndexSimilarity());
//2segmentinfo
//Directory dir = new RAMDirectory(FSDirectory.open(new File("D:\\temp\\realtime3\\realtime")));
Directory dir = FSDirectory.open(new File("D:\\temp\\realtime1\\realtime"));
SegmentInfos infos = new SegmentInfos();
//infos.read(dir, "segments_1");
infos.read(dir);
//System.out.println("info Version:" + infos.getVersion());
//System.out.println("info Counter:" + infos.counter);
System.out.println("SegmentInfos----------------------");
System.out.println("info Seg Count:" + infos.size());
for (int i = 0; i < infos.size(); i++) {
SegmentInfo info = infos.info(i);
System.out.println("****************** segment [" + i + "]");
System.out.println("segment name:" + info.name);
System.out.println("the doc count in segment:" + info.docCount);
//System.out .println("del doc count in segment:" + info.getDelCount());
//System.out.println("segment doc store offset:" + info.getDocStoreOffset());
}
//3List<OneMerge>
IndexWriter writer = new IndexWriter(dir, indexWriterConfig);
System.out.println("List<OneMerge>----------------------");
MergeSpecification mergeSpecification = mergePolicy.findMerges(infos);
List<OneMerge> merges =mergeSpecification.merges;
for (OneMerge oneMerge : merges) {
System.out.println("OneMergeCount:"+oneMerge.totalNumDocs()+"-----------");
List<SegmentInfo> segments = oneMerge.segments;
for (SegmentInfo segmentInfo : segments) {
System.out.println(segmentInfo.name+":"+segmentInfo.docCount);
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
测试结果:
SegmentInfos----------------------
info Seg Count:12
****************** segment [0]
segment name:_v
the doc count in segment:21128
****************** segment [1]
segment name:_n
the doc count in segment:49
****************** segment [2]
segment name:_w
the doc count in segment:667
****************** segment [3]
segment name:_x
the doc count in segment:1000
****************** segment [4]
segment name:_y
the doc count in segment:461
****************** segment [5]
segment name:_z
the doc count in segment:2000
****************** segment [6]
segment name:_10
the doc count in segment:2764
****************** segment [7]
segment name:_11
the doc count in segment:2236
****************** segment [8]
segment name:_12
the doc count in segment:2995
****************** segment [9]
segment name:_13
the doc count in segment:2343
****************** segment [10]
segment name:_14
the doc count in segment:2314
****************** segment [11]
segment name:_15
the doc count in segment:397
List<OneMerge>----------------------
OneMergeCount:1716-----------
_n:49
_w:667
_x:1000
OneMergeCount:5225-----------
_y:461
_z:2000
_10:2764
OneMergeCount:7574-----------
_11:2236
_12:2995
_13:2343
2 测试optimize
测试代码见TestOptimize.java
package com.ctrip.search.gsrest.test;
import java.io.File;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import com.ctrip.search.engine.index.IndexSimilarity;
import com.ctrip.search.schema.Schema;
import com.ctrip.search.schema.SchemaFactory;
import com.ctrip.search.util.IndexName;
public class TestOptimize {
public static void main(String[] args) {
try {
IndexName.set("gsrest");
Schema schema = SchemaFactory.get().get("gsrest");
IndexWriterConfig indexWriterConfig =new IndexWriterConfig(Version.LUCENE_34, schema.getIndexAnalyzer());
indexWriterConfig.setSimilarity(new IndexSimilarity());
Directory dir = new RAMDirectory(FSDirectory.open(new File("D:\\temp\\realtime1\\realtime")));
//Directory dir = FSDirectory.open(new File("D:\\temp\\realtime3\\realtime"));
IndexWriter writer = new IndexWriter(dir, indexWriterConfig);
a36b
SegmentInfos segmentInfos = new SegmentInfos();
segmentInfos.read(writer.getDirectory());
if (segmentInfos!=null) {
System.out.println("segmentInfos size = "+segmentInfos.size());
}
writer.optimize();
writer.commit();
segmentInfos.read(writer.getDirectory());
if (segmentInfos!=null) {
System.out.println("segmentInfos size = "+segmentInfos.size());
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
测试结果:
segmentInfos size = 12
segmentInfos size = 1
相关文章推荐
- lucene中用到的优先队列
- 传智播客-关于lucene的“知识集锦”和字段优先设置问题
- 【算法入门】广度/宽度优先搜索(BFS)
- 22th【贪心&&优先队列】打地鼠游戏
- 实战 Lucene,第 1 部分: 初识 Lucene
- lucene hello world
- 全文检索 lucene 4.7以上的版本只支持jdk1.7以上的 不支持jdk1.6版本
- 全文检索技术 lucene(一) 一个简单的Demo
- 基于Java的全文索引引擎Lucene简介
- poj 3253 Fence Repair【优先队列,堆】
- Html A标签中 href 和 onclick 同时使用的问题 优先级别
- BFS 八数码问题 typedef int State[9]; (BFS A*算法与优先队列)
- Lucene中的基本概念
- 使用Lucene和IKAnalyzer实现 中文简单 分词
- lucene4.0入门实例
- Lucene5学习之TermQuery使用
- 在校大学生以学业为重还是创业优先?
- lucene使用教程4 --常用类的对象之IndexSearcher
- css3布局-左右两栏固定宽度,中间栏优先加载
- Lucene5学习之CustomScoreQuery