您的位置:首页 > 其它

lucene优先

2017-12-04 15:08 23 查看
lucene段合并 http://www.blogjava.net/tim-wu/archive/2008/02/06/179380.html http://forfuture1978.iteye.com/blog/609197 http://blog.csdn.net/i_mc_90/article/details/7778884
http://blog.itpub.net/28624388/viewspace-767812/ http://blog.csdn.net/lilinhai548/article/details/8570315
1,测试 MergePolicy
测试代码见MergePolicy.java
package com.ctrip.search.gsrest.test;

import java.io.File;

import java.util.List;

import org.apache.lucene.index.IndexWriter;

import org.apache.lucene.index.IndexWriterConfig;

import org.apache.lucene.index.LogDocMergePolicy;

import org.apache.lucene.index.LogMergePolicy;

import org.apache.lucene.index.MergePolicy.MergeSpecification;

import org.apache.lucene.index.MergePolicy.OneMerge;

import org.apache.lucene.index.SegmentInfo;

import org.apache.lucene.index.SegmentInfos;

import org.apache.lucene.store.Directory;

import org.apache.lucene.store.FSDirectory;

import org.apache.lucene.util.Version;

import com.ctrip.search.engine.index.IndexSimilarity;

import com.ctrip.search.schema.Schema;

import com.ctrip.search.schema.SchemaFactory;

import com.ctrip.search.util.IndexName;

public class MergePolicy {

public static void main(String[] args) {

try {

IndexName.set("gsrest");

Schema schema = SchemaFactory.get().get("gsrest");

//1MergePolicy配置

IndexWriterConfig indexWriterConfig =

new IndexWriterConfig(Version.LUCENE_34, schema.getIndexAnalyzer());

//LogMergePolicy mergePolicy = new LogByteSizeMergePolicy();

LogMergePolicy mergePolicy = new LogDocMergePolicy();

mergePolicy.setMergeFactor(3);

indexWriterConfig.setMergePolicy(mergePolicy);

indexWriterConfig.setSimilarity(new IndexSimilarity());

//2segmentinfo

//Directory dir = new RAMDirectory(FSDirectory.open(new File("D:\\temp\\realtime3\\realtime")));

Directory dir = FSDirectory.open(new File("D:\\temp\\realtime1\\realtime"));

SegmentInfos infos = new SegmentInfos();
//infos.read(dir, "segments_1");
infos.read(dir);
//System.out.println("info Version:" + infos.getVersion());

//System.out.println("info Counter:" + infos.counter);

System.out.println("SegmentInfos----------------------");

System.out.println("info Seg Count:" + infos.size());

for (int i = 0; i < infos.size(); i++) {

SegmentInfo info = infos.info(i);

System.out.println("****************** segment [" + i + "]");

System.out.println("segment name:" + info.name);

System.out.println("the doc count in segment:" + info.docCount);

//System.out .println("del doc count in segment:" + info.getDelCount());

//System.out.println("segment doc store offset:" + info.getDocStoreOffset());

}

//3List<OneMerge>

IndexWriter writer = new IndexWriter(dir, indexWriterConfig);

System.out.println("List<OneMerge>----------------------");

MergeSpecification mergeSpecification = mergePolicy.findMerges(infos);

List<OneMerge> merges =mergeSpecification.merges;

for (OneMerge oneMerge : merges) {

System.out.println("OneMergeCount:"+oneMerge.totalNumDocs()+"-----------");

List<SegmentInfo> segments = oneMerge.segments;

for (SegmentInfo segmentInfo : segments) {

System.out.println(segmentInfo.name+":"+segmentInfo.docCount);

}

}

} catch (Exception e) {

e.printStackTrace();

}

}

}

测试结果:
SegmentInfos----------------------

info Seg Count:12

****************** segment [0]

segment name:_v

the doc count in segment:21128

****************** segment [1]

segment name:_n

the doc count in segment:49

****************** segment [2]

segment name:_w

the doc count in segment:667

****************** segment [3]

segment name:_x

the doc count in segment:1000

****************** segment [4]

segment name:_y

the doc count in segment:461

****************** segment [5]

segment name:_z

the doc count in segment:2000

****************** segment [6]

segment name:_10

the doc count in segment:2764

****************** segment [7]

segment name:_11

the doc count in segment:2236

****************** segment [8]

segment name:_12

the doc count in segment:2995

****************** segment [9]

segment name:_13

the doc count in segment:2343

****************** segment [10]

segment name:_14

the doc count in segment:2314

****************** segment [11]

segment name:_15

the doc count in segment:397

List<OneMerge>----------------------

OneMergeCount:1716-----------

_n:49

_w:667

_x:1000

OneMergeCount:5225-----------

_y:461

_z:2000

_10:2764

OneMergeCount:7574-----------

_11:2236

_12:2995

_13:2343

2 测试optimize
测试代码见TestOptimize.java

package com.ctrip.search.gsrest.test;

import java.io.File;

import org.apache.lucene.index.IndexWriter;

import org.apache.lucene.index.IndexWriterConfig;

import org.apache.lucene.index.SegmentInfos;

import org.apache.lucene.store.Directory;

import org.apache.lucene.store.FSDirectory;

import org.apache.lucene.store.RAMDirectory;

import org.apache.lucene.util.Version;

import com.ctrip.search.engine.index.IndexSimilarity;

import com.ctrip.search.schema.Schema;

import com.ctrip.search.schema.SchemaFactory;

import com.ctrip.search.util.IndexName;

public class TestOptimize {

public static void main(String[] args) {

try {

IndexName.set("gsrest");

Schema schema = SchemaFactory.get().get("gsrest");

IndexWriterConfig indexWriterConfig =new IndexWriterConfig(Version.LUCENE_34, schema.getIndexAnalyzer());

indexWriterConfig.setSimilarity(new IndexSimilarity());

Directory dir = new RAMDirectory(FSDirectory.open(new File("D:\\temp\\realtime1\\realtime")));

//Directory dir = FSDirectory.open(new File("D:\\temp\\realtime3\\realtime"));

IndexWriter writer = new IndexWriter(dir, indexWriterConfig);

a36b
SegmentInfos segmentInfos = new SegmentInfos();

segmentInfos.read(writer.getDirectory());

if (segmentInfos!=null) {

System.out.println("segmentInfos size = "+segmentInfos.size());

}

writer.optimize();

writer.commit();

segmentInfos.read(writer.getDirectory());

if (segmentInfos!=null) {

System.out.println("segmentInfos size = "+segmentInfos.size());

}

} catch (Exception e) {

e.printStackTrace();

}

}

}

测试结果:
segmentInfos size = 12

segmentInfos size = 1
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  lucene 合并