您的位置:首页 > 其它

全文检索Lucene(四)---Compass框架

2017-07-12 21:51 176 查看
Compass是一个强大的,事务的,高性能的对象/搜索引擎映射(OSEM:object/search engine mapping)与一个Java持久层框架。

Compass之于Lucene,就像Hibernate之于JDBC,Compass就是把Lucene封装了一层。

Compass目前版本是2.2.0,已经很久没有更新与维护。Compass2.2的版本所对应的Lucene2.4.1。

下载地址:http://www.compass-project.org/

解压后的目录结构:



开发的基本jar包在dist及其子目录下查找,主要如下,



代码示例:

Article.java

package com.my.bean;

import org.compass.annotations.Index;
import org.compass.annotations.Searchable;
import org.compass.annotations.SearchableBoostProperty;
import org.compass.annotations.SearchableId;
import org.compass.annotations.SearchableProperty;
import org.compass.annotations.Store;

@Searchable
public class Article {
// 对于@SearchableId,默认是不可以进行查询的,在指定了name参数后,就可以使用这个属性查询了
// 对于数字的属性,可以指定format为若干个0,表示要存成几个长度,如果不足这个长度,前面用'0'补齐
@SearchableId(name = "id", format = "00000000")
private Integer id;
@SearchableProperty(name = "title", store = Store.YES, index = Index.ANALYZED)
private String title;
@SearchableProperty(name = "content", store = Store.YES, index = Index.ANALYZED)
private String content;
@SearchableBoostProperty
private float boostValue = 1F;

public Integer getId() {
return id;
}

public void setId(Integer id) {
this.id = id;
}

public String getTitle() {
return title;
}

public void setTitle(String title) {
this.title = title;
}

public String getContent() {
return content;
}

public void setContent(String content) {
this.content = content;
}

public float getBoostValue() {
return boostValue;
}

public void setBoostValue(float boostValue) {
this.boostValue = boostValue;
}

}


HelloWorld.java

package com.my.compass;

import java.util.ArrayList;
import java.util.List;

import org.compass.core.Compass;
import org.compass.core.CompassHits;
import org.compass.core.CompassSession;
import org.compass.core.CompassTransaction;
import org.compass.core.config.CompassConfiguration;
import org.junit.Test;

import com.my.bean.Article;

public class HelloWorld {

private CompassConfiguration cfg = new CompassConfiguration().configure();
private Compass compassSessionFactory = cfg.buildCompass();

// 建立索引(模拟在贴吧中发表了一个文章,会保存到数据库中,并且应该建立索引,以便能搜索到)
@Test
public void createIndex() throws Exception {
// 模拟一条刚保存到数据库中的数据
Article article = new Article();
article.setId(1);
article.setTitle("Lucene是全文检索的框架");
article.setContent("如果信息检索系统在用户发出了检索请求后再去互联网上找答案,根本无法在有限的时间内返回结果。");

// 建立索引 ?
CompassSession session = compassSessionFactory.openSession();
CompassTransaction tx = session.beginTransaction();
session.create(article); // 创建索引
tx.commit();
session.close();
}

// 搜索
@Test
public void search() throws Exception {
// 搜索条件
//      String queryString = "lucene";
String queryString = "compass";

// 进行搜索,得到结果 ?
List<Article> list = new ArrayList<Article>();
CompassSession session = compassSessionFactory.openSession();
CompassTransaction tx = session.beginTransaction();

CompassHits hits = session.find(queryString);
// int count = hits.length(); // 总结果数
for (int i = 0; i < hits.length(); i++) {
Article article = (Article) hits.data(i);
// hits.score(i);
list.add(article);
}

tx.commit();
session.close();

// 显示结果
System.out.println("总结果数量为:" + list.size());
for (Article article : list) {
System.out.println("--------> id = " + article.getId());
System.out.println("title  = " + article.getTitle());
System.out.println("content= " + article.getContent());
}
}
}


compass.cfg.xml

<?xml version="1.0" encoding="utf-8"?>
<compass-core-config xmlns="http://www.compass-project.org/schema/core-config"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.compass-project.org/schema/core-config http://www.compass-project.org/schema/compass-core-config-2.2.xsd"> 
<compass name="default">
<!-- 连接信息 -->
<connection>
<file path="./indexDir/" />
</connection>

<!-- 声映射信息 -->
<mappings>
<class name="com.my.bean.Article" />
</mappings>

<!-- 其他配置  -->
<settings>
<!-- 配置高亮器:前缀 -->
<setting name="compass.engine.highlighter.default.formatter.simple.pre" value="<span class='keyword'>" />
<!-- 配置高亮器:后缀 -->
<setting name="compass.engine.highlighter.default.formatter.simple.post" value="</span>" />
<!-- 配置高亮器:摘要大小 -->
<setting name="compass.engine.highlighter.default.fragmenter.simple.size" value="20" />

<!-- 配置分词器 -->
<setting name="compass.engine.analyzer.default.type" value="jeasy.analysis.MMAnalyzer" />
</settings>

</compass>
</compass-core-config>


Compass增删改查

CompassUtils.java

package com.my.utils;

import org.compass.core.Compass;
import org.compass.core.CompassSession;
import org.compass.core.config.CompassConfiguration;

public class CompassUtils {

private static Compass compassSessionFactory;

static {
CompassConfiguration cfg = new CompassConfiguration().configure();
compassSessionFactory = cfg.buildCompass();
}

/**
* 打开一个新的CompassSession并返回
*
* @return
*/
public static CompassSession openSession() {
return compassSessionFactory.openSession();
}

public static Compass getCompassSessionFactory() {
return compassSessionFactory;
}

}


ArticleIndexDao.java

package com.my.compass;

import java.util.ArrayList;
import java.util.List;

import org.compass.core.CompassHits;
import org.compass.core.CompassSession;
import org.compass.core.CompassTransaction;

import com.my.bean.Article;
import com.my.bean.QueryResult;
import com.my.utils.CompassUtils;

public class ArticleIndexDao {

/**
* 创建索引(保存到索引库)
*
* @param article
*/
public void save(Article article) {
CompassSession session = CompassUtils.openSession();
CompassTransaction tx = null;
try {
tx = session.beginTransaction();
session.create(article); // 建立索引
tx.commit();
} catch (Exception e) {
tx.rollback();
throw new RuntimeException(e);
} finally {
session.close();
}
}

/**
* 删除索引
*
* Term:是指某字段中的某个关键词(在目录中出现的关键词)
*
* @param id
*/
public void delete(Integer id) {
CompassSession session = CompassUtils.openSession();
CompassTransaction tx = null;
try {
tx = session.beginTransaction();
session.delete(Article.class, id); // 删除索引
tx.commit();
} catch (Exception e) {
tx.rollback();
throw new RuntimeException(e);
} finally {
session.close();
}
}

/**
* 更新索引
*
* @param article
*/
public void update(Article article) {
CompassSession session = CompassUtils.openSession();
CompassTransaction tx = null;
try {
tx = session.beginTransaction();
session.save(article); // 更新索引
tx.commit();
} catch (Exception e) {
tx.rollback();
throw new RuntimeException(e);
} finally {
session.close();
}
}

/**
* 搜索(分页)
*
* @param queryString
* @param firstResult
* @param maxResults
* @return 总记录数 + 一页数据列表
*/
public QueryResult search(String queryString, int firstResult, int maxResults) {
CompassSession session = CompassUtils.openSession();
CompassTransaction tx = null;
try {
tx = session.beginTransaction();
// -------------------------------------------------
// 查询,得到中间结果
CompassHits hits = session.find(queryString);
int count = hits.length();

// 处理结果并返回
List<Article> list = new ArrayList<Article>();
int endIndex = Math.min(firstResult + maxResults, hits.length());

for (int i = firstResult; i < endIndex; i++) { // 只取一段数据
Article article = (Article) hits.data(i);

// ------------------------------------------------
// 做高亮操作,一次高亮一个属性,如果当前高亮的属性值中没有出现搜索的关键字,则返回null
String text = hits.highlighter(i).fragment("content");
if (text != null) {
article.setContent(text); // 使用高亮后的文本替换原始内容
}
// ------------------------------------------------

list.add(article);
}
tx.commit();

return new QueryResult(count, list);
// -------------------------------------------------
} catch (Exception e) {
tx.rollback();
throw new RuntimeException(e);
} finally {
session.close();
}
}
}


基于CompassTemplate的增删改查

package com.my.compass;

import java.util.ArrayList;
import java.util.List;

import org.compass.core.CompassCallback;
import org.compass.core.CompassException;
import org.compass.core.CompassHits;
import org.compass.core.CompassSession;
import org.compass.core.CompassTemplate;

import com.my.bean.Article;
import com.my.bean.QueryResult;
import com.my.utils.CompassUtils;

public class ArticleIndexDao2 {

/**
* 创建索引(保存到索引库)
*
* @param article
*/
public void save(Article article) {
CompassTemplate compassTemplate = new CompassTemplate(CompassUtils.getCompassSessionFactory());
compassTemplate.create(article);
}

/**
* 删除索引
*
* Term:是指某字段中的某个关键词(在目录中出现的关键词)
*
* @param id
*/
public void delete(Integer id) {
CompassTemplate compassTemplate = new CompassTemplate(CompassUtils.getCompassSessionFactory());
compassTemplate.delete(Article.class, id);
}

/**
* 更新索引
*
* @param article
*/
public void update(Article article) {
CompassTemplate compassTemplate = new CompassTemplate(CompassUtils.getCompassSessionFactory());
compassTemplate.save(article);
}

/**
* 搜索(分页)
*
* @param queryString
* @param firstResult
* @param maxResults
* @return 总记录数 + 一页数据列表
*/
public QueryResult search(final String queryString, final int firstResult, final int maxResults) {
CompassTemplate compassTemplate = new CompassTemplate(CompassUtils.getCompassSessionFactory());

return compassTemplate.execute(new CompassCallback<QueryResult>() {
public QueryResult doInCompass(CompassSession session) throws CompassException {
// 查询,得到中间结果
CompassHits hits = session.find(queryString);
int count = hits.length();

// 处理结果并返回
List<Article> list = new ArrayList<Article>();
int endIndex = Math.min(firstResult + maxResults, hits.length());

for (int i = firstResult; i < endIndex; i++) { // 只取一段数据
Article article = (Article) hits.data(i);

// 做高亮操作,一次高亮一个属性,如果当前高亮的属性值中没有出现搜索的关键字,则返回null
String text = hits.highlighter(i).fragment("content");
if (text != null) {
article.setContent(text); // 使用高亮后的文本替换原始内容
}

list.add(article);
}
return new QueryResult(count, list);
}
});
}
}


查询示例:

package com.my.compass;

import java.util.ArrayList;
import java.util.List;

import org.compass.core.CompassHits;
import org.compass.core.CompassQuery;
import org.compass.core.CompassQueryBuilder.CompassMultiPhraseQueryBuilder;
import org.compass.core.CompassSession;
import org.compass.core.CompassTransaction;
import org.junit.Test;

import com.my.bean.Article;
import com.my.utils.CompassUtils;

public class QueryTest {

@Test
public void search() throws Exception {

CompassSession session = CompassUtils.openSession();
CompassTransaction tx = session.beginTransaction();

// 查询
// ------------------------------------------------

// 1,查询所有
CompassQuery query1 = session.queryBuilder().matchAll();

// 关键词查询
CompassQuery query2 = session.queryBuilder().term("title", "lucene");

// 范围查询
CompassQuery query3 = session.queryBuilder().between("id", 5, 15, true);

// 通配符查询
CompassQuery query4 = session.queryBuilder().wildcard("title", "luc*n?");

// 模糊查询
CompassQuery query5 = session.queryBuilder().fuzzy("title", "lucenx", 0.8F);

// 短语查询
CompassMultiPhraseQueryBuilder multiPhraseQueryBuilder = session.queryBuilder().multiPhrase("title");
multiPhraseQueryBuilder.add("lucene", 0);
multiPhraseQueryBuilder.add("框架", 3);
CompassQuery query6 = multiPhraseQueryBuilder.toQuery();

CompassQuery query7 = session.queryBuilder().multiPhrase("title") //
.add("lucene", 0) //
.add("框架", 3) //
.toQuery();

CompassQuery query8 = session.queryBuilder().multiPhrase("title") //
.add("lucene") //
.add("框架") //
.setSlop(5)// 词之间的间隔不超过5个
.toQuery();

// 布尔查询
CompassQuery query = session.queryBuilder().bool()//
// .addMust(query1) // 必须满足
// .addMustNot(query2) // 非
// .addShould(query3) // 多个Should一起用是OR的关系
.addMust(query1)//
.addMust(query2)//
.toQuery();

CompassHits hits = query.hits();

List<Article> list = new ArrayList<Article>();
for (int i = 0; i < hits.length(); i++) {
Article article = (Article) hits.data(i);
list.add(article);
}

tx.commit();
session.close();

// 显示结果
System.out.println("总结果数量为:" + list.size());
for (Article article : list) {
System.out.println("--------> id = " + article.getId());
System.out.println("title  = " + article.getTitle());
System.out.println("content= " + article.getContent());
}
}
}


过滤示例:

package com.my.compass;

import java.util.ArrayList;
import java.util.List;

import org.compass.core.CompassHits;
import org.compass.core.CompassQuery;
import org.compass.core.CompassQueryFilter;
import org.compass.core.CompassSession;
import org.compass.core.CompassTransaction;
import org.junit.Test;

import com.my.bean.Article;
import com.my.utils.CompassUtils;

public class FilterTest {

@Test
public void search() throws Exception {
// 搜索条件
String queryString = "lucene";

List<Article> list = new ArrayList<Article>();
CompassSession session = CompassUtils.openSession();
CompassTransaction tx = session.beginTransaction();

// CompassHits hits = session.find(queryString);
CompassQuery query = session.queryBuilder().queryString(queryString).toQuery();

// 指定过滤条件
CompassQueryFilter filter = session.queryFilterBuilder().between("id", 1, 15, true, true);
query.setFilter(filter);

CompassHits hits = query.hits();

for (int i = 0; i < hits.length(); i++) {
Article article = (Article) hits.data(i);
list.add(article);
}

tx.commit();
session.close();

// 显示结果
System.out.println("总结果数量为:" + list.size());
for (Article article : list) {
System.out.println("--------> id = " + article.getId());
System.out.println("title  = " + article.getTitle());
System.out.println("content= " + article.getContent());
}
}
}


排序示例:

package com.my.compass;

import java.util.ArrayList;
import java.util.List;

import org.compass.core.CompassHits;
import org.compass.core.CompassQuery;
import org.compass.core.CompassQuery.SortDirection;
import org.compass.core.CompassSession;
import org.compass.core.CompassTransaction;
import org.junit.Test;

import com.my.bean.Article;
import com.my.utils.CompassUtils;

public class SortTest {

@Test
public void createIndex() throws Exception {
Article article = new Article();
article.setId(27);
article.setTitle("LuceneUtil与Lucene是全文检索的框架");
article.setContent("如果信息检索系统在用户发出了检索请求后再去互联网上找答案,根本无法在有限的时间内返回结果。");
article.setBoostValue(2F); // 默认是1F

CompassSession session = CompassUtils.openSession();
CompassTransaction tx = session.beginTransaction();
session.create(article); // 创建索引
tx.commit();
session.close();
}

@Test
public void search() throws Exception {
// 搜索条件
String queryString = "lucene";

List<Article> list = new ArrayList<Article>();
CompassSession session = CompassUtils.openSession();
CompassTransaction tx = session.beginTransaction();

CompassHits hits = session.find(queryString);
for (int i = 0; i < hits.length(); i++) {
Article article = (Article) hits.data(i);
list.add(article);
}

tx.commit();
session.close();

// 显示结果
System.out.println("总结果数量为:" + list.size());
for (Article article : list) {
System.out.println("--------> id = " + article.getId());
System.out.println("title  = " + article.getTitle());
System.out.println("content= " + article.getContent());
}
}

@Test
public void search2() throws Exception {
// 搜索条件
String queryString = "lucene";

// 进行搜索,得到结果 ?
List<Article> list = new ArrayList<Article>();
CompassSession session = CompassUtils.openSession();
CompassTransaction tx = session.beginTransaction();

// CompassHits hits = session.find(queryString);
CompassQuery query = session.queryBuilder().queryString(queryString).toQuery();

// query.addSort("id"); // 按id升序排列
query.addSort("id", SortDirection.REVERSE); // 按id降序排列

CompassHits hits = query.hits();

for (int i = 0; i < hits.length(); i++) {
Article article = (Article) hits.data(i);
list.add(article);
}

tx.commit();
session.close();

// 显示结果
System.out.println("总结果数量为:" + list.size());
for (Article article : list) {
System.out.println("--------> id = " + article.getId());
System.out.println("title  = " + article.getTitle());
System.out.println("content= " + article.getContent());
}
}

}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息