您的位置:首页 > 其它

二、lucene2.4的增删改查及其查询语法

2013-04-12 11:13 423 查看
1、工程结构图



2、排序的几种方式


0

3、增删改查索引

package cn.hj.lucene.dao;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import jeasy.analysis.MMAnalyzer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.NumberTools;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.RangeFilter;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.Scorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;

public class IndexDao {

String indexPath = "D:\\Workspaces\\lucenedemo\\luceneIndex";

//	 Analyzer analyzer = new StandardAnalyzer();
Analyzer analyzer = new MMAnalyzer();// 词库分词

/**
* 添加/创建索引
*
* @param doc
*/
public void save(Document doc) {
IndexWriter indexWriter = null;
try {
indexWriter = new IndexWriter(indexPath, analyzer, MaxFieldLength.LIMITED);
indexWriter.addDocument(doc);
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
try {
indexWriter.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}

/**
* Term是搜索的最小单位,代表某个 Field 中的一个关键词,如:<title, lucene>
*
* new Term( "title", "lucene" );
*
* new Term( "id", "5" );
*
* new Term( "id", UUID );
*
* @param term
*/
public void delete(Term term) {
IndexWriter indexWriter = null;
try {
indexWriter = new IndexWriter(indexPath, analyzer, MaxFieldLength.LIMITED);
indexWriter.deleteDocuments(term);
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
try {
indexWriter.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}

/**
* 更新索引
*
* <pre>
* indexWriter.deleteDocuments(term);
* indexWriter.addDocument(doc);
* </pre>
*
* @param term
* @param doc
*/
public void update(Term term, Document doc) {
IndexWriter indexWriter = null;
try {
indexWriter = new IndexWriter(indexPath, analyzer, MaxFieldLength.LIMITED);
indexWriter.updateDocument(term, doc);
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
try {
indexWriter.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}

/**
* <pre>
* totalPage = recordCount / pageSize;
* if (recordCount % pageSize > 0)
* 	totalPage++;
* </pre>
*
* @param queryString
* @param firstResult
* @param maxResults
* @return
*/
public QueryResult search(String queryString, int firstResult, int maxResults) {
try {
// 1,把要搜索的文本解析为 Query
String[] fields = { "name", "content" };
Map<String, Float> boosts = new HashMap<String, Float>();
boosts.put("name", 3f);//数字越到权重越高
// boosts.put("content", 1.0f); 默认为1.0f

QueryParser queryParser = new MultiFieldQueryParser(fields, analyzer, boosts);
Query query = queryParser.parse(queryString);

return search(query, firstResult, maxResults);
} catch (Exception e) {
throw new RuntimeException(e);
}
}

public QueryResult search(Query query, int firstResult, int maxResults) {
IndexSearcher indexSearcher = null;

try {
// 2,进行查询
indexSearcher = new IndexSearcher(indexPath);
Filter filter = new RangeFilter("size", NumberTools.longToString(200)
, NumberTools.longToString(1000), true, true);

// ========== 排序
Sort sort = new Sort();
sort.setSort(new SortField("size")); // 默认为升序
// sort.setSort(new SortField("size", true));
// ==========

TopDocs topDocs = indexSearcher.search(query, filter, 10000, sort);

int recordCount = topDocs.totalHits;
List<Document> recordList = new ArrayList<Document>();

// ============== 准备高亮器
Formatter formatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");
Scorer scorer = new QueryScorer(query);
Highlighter highlighter = new Highlighter(formatter, scorer);

Fragmenter fragmenter = new SimpleFragmenter(50);
highlighter.setTextFragmenter(fragmenter);
// ==============

// 3,取出当前页的数据
int end = Math.min(firstResult + maxResults, topDocs.totalHits);
for (int i = firstResult; i < end; i++) {
ScoreDoc scoreDoc = topDocs.scoreDocs[i];

int docSn = scoreDoc.doc; // 文档内部编号
Document doc = indexSearcher.doc(docSn); // 根据编号取出相应的文档

// =========== 高亮
// 返回高亮后的结果,如果当前属性值中没有出现关键字,会返回 null
String hc = highlighter.getBestFragment(analyzer, "content", doc.get("content"));
if (hc == null) {
String content = doc.get("content");
int endIndex = Math.min(50, content.length());
hc = content.substring(0, endIndex);// 最多前50个字符
}
doc.getField("content").setValue(hc);
// ===========

recordList.add(doc);
}

// 返回结果
return new QueryResult(recordCount, recordList);
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
try {
indexSearcher.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}


4、增删改查的测试

package cn.hj.lucene.dao;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.junit.Test;
import cn.hj.lucene.utils.File2DocumentUtils;
public class IndexDaoTest {
String filePath = "D:\\Workspaces\\lucenedemo\\luceneDatasource\\IndexWriter addDocument's a javadoc .txt";
String filePath2 = "D:\\Workspaces\\lucenedemo\\luceneDatasource\\小笑话_总统的房间 Room .txt";

IndexDao indexDao = new IndexDao();

@Test
public void testSave() {
Document doc = File2DocumentUtils.file2Document(filePath);
doc.setBoost(3f);
indexDao.save(doc);

Document doc2 = File2DocumentUtils.file2Document(filePath2);
// doc2.setBoost(1.0f);
indexDao.save(doc2);
}

@Test
public void testDelete() {
Term term = new Term("path", filePath);
indexDao.delete(term);
}

@Test
public void testUpdate() {
Term term = new Term("path", filePath);

Document doc = File2DocumentUtils.file2Document(filePath);
doc.getField("content").setValue("这是更新后的文件内容");

indexDao.update(term, doc);
}

@Test
public void testSearch() {
// String queryString = "IndexWriter";
// String queryString = "房间";
// String queryString = "笑话";
String queryString = "room";
// String queryString = "content:绅士";
QueryResult qr = indexDao.search(queryString, 0, 10);

System.out.println("总共有【" + qr.getRecordCount() + "】条匹配结果");
for (Document doc : qr.getRecordList()) {
File2DocumentUtils.printDocument(doc);
}
}

}


5、分页时所用到的bean

package cn.hj.lucene.dao;

import java.util.List;
import org.apache.lucene.document.Document;

public class QueryResult {
private int recordCount;
private List<Document> recordList;

public QueryResult(int recordCount, List<Document> recordList) {
super();
this.recordCount = recordCount;
this.recordList = recordList;
}

public int getRecordCount() {
return recordCount;
}

public void setRecordCount(int recordCount) {
this.recordCount = recordCount;
}

public List<Document> getRecordList() {
return recordList;
}

public void setRecordList(List<Document> recordList) {
this.recordList = recordList;
}

}


6、lucene的查询语法

package cn.hj.lucene.query;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.NumberTools;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.RangeQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.BooleanClause.Occur;
import org.junit.Test;
import cn.hj.lucene.dao.IndexDao;
import cn.hj.lucene.dao.QueryResult;
import cn.hj.lucene.utils.File2DocumentUtils;

public class QueryTest {
IndexDao indexDao = new IndexDao();

/**
* 查询并且打印结果
* @param query
*/
private void queryAndPrintResult(Query query) {
QueryResult qr = indexDao.search(query, 0, 100);
System.out.println("总共有【"+qr.getRecordCount()+"】条匹配结果");
for(Document doc : qr.getRecordList()){
File2DocumentUtils.printDocument(doc);
}
}
/**
* 关键字查询
*/
@Test
public void testTermQuery(){
Term term = new Term("name","room");
Query query = new TermQuery(term);
queryAndPrintResult(query);
}

/**
* 范围查询
* 索引和搜索的时候搜索参数需要有一个相同的宽度
* 包含边界:size:[0000000000001e TO 000000000000rs]
*
* 不包含边界:size:{0000000000001e TO 000000000000rs}
*/
@Test
public void testRangeQuery() {
Term lowerTerm = new Term("size", NumberTools.longToString(50));
Term upperTerm = new Term("size", NumberTools.longToString(1000));
Query query = new RangeQuery(lowerTerm, upperTerm, false);

queryAndPrintResult(query);
}

// public static void main(String[] args) {
// System.out.println(Long.MAX_VALUE);
// System.out.println(NumberTools.longToString(1000));
// System.out.println(NumberTools.stringToLong("000000000000rs"));
//
// System.out.println(DateTools.dateToString(new Date(), Resolution.DAY));
// System.out.println(DateTools.dateToString(new Date(), Resolution.MINUTE));
// System.out.println(DateTools.dateToString(new Date(), Resolution.SECOND));
// }
/**
* 通配符查询
*
* '?' 代表一个字符, '*' 代表0个或多个字符
*
* name:房*
*
* name:*o*
*
* name:roo?
*/
@Test
public void testWildcardQuery() {
Term term = new Term("name", "roo?");
// Term term = new Term("name", "ro*"); // 前缀查询 PrefixQuery
// Term term = new Term("name", "*o*");
// Term term = new Term("name", "房*");
Query query = new WildcardQuery(term);

queryAndPrintResult(query);
}

/**
* 短语查询
*
* content:"? 绅士 ? ? 饭店"
*
* content:"绅士 饭店"~2
*/
@Test
public void testPhraseQuery() {
PhraseQuery phraseQuery = new PhraseQuery();
//表示饭店和绅士中间个两个词语隔(2,3),这个位置是相对的,只要中间隔两个即可
// phraseQuery.add(new Term("content", "绅士"), 1);
// phraseQuery.add(new Term("content", "饭店"), 4);

//现在我不知道查多少个,但是我知道大约查几个
phraseQuery.add(new Term("content", "绅士"));//第三个属性位置不填 ->
phraseQuery.add(new Term("content", "饭店"));//->默认是012345递增
//这个表示上面两个词最多中间隔了几个词
phraseQuery.setSlop(2);

queryAndPrintResult(phraseQuery);
}

/**
* +content:"绅士 饭店"~2 -size:[000000000000dw TO 000000000000rs]
*
* +content:"绅士 饭店"~2 +size:[000000000000dw TO 000000000000rs]
*
* content:"绅士 饭店"~2 size:[000000000000dw TO 000000000000rs]
*
* +content:"绅士 饭店"~2 size:[000000000000dw TO 000000000000rs]
*/
@Test
public void testBooleanQuery() {
// 条件1
PhraseQuery query1 = new PhraseQuery();
query1.add(new Term("content", "绅士"));
query1.add(new Term("content", "饭店"));
query1.setSlop(2);

// 条件2
Term lowerTerm = new Term("size", NumberTools.longToString(500));
Term upperTerm = new Term("size", NumberTools.longToString(1000));
Query query2 = new RangeQuery(lowerTerm, upperTerm, true);

// 组合
BooleanQuery boolQuery = new BooleanQuery();
boolQuery.add(query1, Occur.MUST);
boolQuery.add(query2, Occur.SHOULD);

queryAndPrintResult(boolQuery);
}

/**
* 有+-的方式 +表示MUST -表示MUST_NOT  什么都不写表示SHOULD
* 也有AND OR NOT的方式
*/
@Test
public void testQueryString() {
// String queryString = "+content:\"绅士 饭店\"~2 -size:[000000000000dw TO 000000000000rs]";
// String queryString = "content:\"绅士 饭店\"~2 AND size:[000000000000dw TO 000000000000rs]";
// String queryString = "content:\"绅士 饭店\"~2 OR size:[000000000000dw TO 000000000000rs]";
// String queryString = "(content:\"绅士 饭店\"~2 NOT size:[000000000000dw TO 000000000000rs])";
//		String queryString = "-content:\"绅士 饭店\"~2 AND -size:[000000000000dw TO 000000000000rs]";
//		String queryString = "-content:\"绅士 饭店\"~2 OR -size:[000000000000dw TO 000000000000rs]";
String queryString = "-content:\"绅士 饭店\"~2 NOT -size:[000000000000dw TO 000000000000rs]";

QueryResult qr = indexDao.search(queryString, 0, 10);
System.out.println("总共有【" + qr.getRecordCount() + "】条匹配结果");
for (Document doc : qr.getRecordList()) {
File2DocumentUtils.printDocument(doc);
}
}
}


7、lucene的工具方法

package cn.hj.lucene.utils;
import java.io.File;
import java.io.IOException;
import org.apache.commons.io.FileUtils;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumberTools;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;

public class File2DocumentUtils {

/**
* 把文件内容转化为Document
* @param path
* @return
*/
public static Document file2Document(String path){
File file = new File(path);
Document doc = new Document();
doc.add(new Field("name",file.getName(),Store.YES,Index.ANALYZED));
try {
doc.add(new Field("content",FileUtils.readFileToString(file),Store.YES,Index.ANALYZED));
} catch (IOException e) {e.printStackTrace();}
doc.add(new Field("size",String.valueOf(path.length()),Store.YES,Index.NOT_ANALYZED));
doc.add(new Field("path",path,Store.YES,Index.NO));
return doc;
}

/**
* 打印搜索结果信息
*/
public static void printDocument(Document doc){
//第一种方式的获取值
//		Field field = doc.getField("name");
//		String name = field.stringValue();
//第二种方式的获取值
System.out.println("----------------------------------");
System.out.println("name    = "+doc.get("name"));
System.out.println("content = "+doc.get("content"));
//		System.out.println("size    = "+doc.get("size"));
System.out.println("size    = "+NumberTools.stringToLong(doc.get("size")));
System.out.println("path    = "+doc.get("path"));
}
}


工程代码:http://download.csdn.net/detail/wxwzy738/5248905
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: