您的位置:首页 > 其它

第一个lucene例子

2009-11-20 18:18 337 查看
目前需要解决的问题是:解决英文单字模糊查询

package com.test.search;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
import org.apache.lucene.analysis.cn.ChineseAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.Token;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MultiSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;
import org.compass.core.util.reader.StringReader;

public class LuceneTest {
private static String indexFilePath = "e://lucene//";

//private static String indexFilePathClass = "e://lucene//class//";
//private static String indexFilePathSchool = "e://lucene//school";

public LuceneTest(String filepath) {
this.indexFilePath = filepath;
}

public static void main(String[] args) {
try {
LuceneTest luceneTest = new LuceneTest(indexFilePath);
//luceneTest.initIndex();// 初始化索引
//luceneTest.search("student", "w"); //需要解决分词问题,目前只支持整词检索// 调用search方法进行检索,填入想检索的文字 多个词可用空格分开,
luceneTest.search("class","一");// 调用search方法进行检索,填入想检索的文字 多个词可用空格分开
//luceneTest.search("school","东");// 调用search方法进行检索,填入想检索的文字 多个词可用空格分开

// 删除一条记录
// indexWriter.deleteDocuments(new Term("title","1"));

// 增加一条记录
// indexWriter.addDocument(document);

// 更新一条记录
// indexWriter.updateDocument(new Term("title","1"), document);

System.out.println("检索完毕!");
} catch (Exception e) {
e.printStackTrace();
}
}

/**
* 执行查询
* @param searchType--查询类别(学生,班级,学校)
* @param serchString--输入的查询关键字
* @throws Exception
*/
public void search(String searchType, String serchWord) throws Exception {
Directory directory = new SimpleFSDirectory(new File(indexFilePath
+ File.separator + searchType));

/* 创建一个搜索,搜索刚才创建的e://lucene//目录下的索引 */
IndexSearcher indexSearcher = new IndexSearcher(directory, true);
/* 在这里我们只需要搜索一个目录 */
IndexSearcher indexSearchers[] = { indexSearcher };
/* Multisearcher表示多目录搜索,在这里我们只有一个目录 */
Searcher searcher = new MultiSearcher(indexSearchers);

/* 我们需要搜索两个域"ArticleTitle", "ArticleText"里面的内容 */
String[] fields = { "title", "content", "url" };

/* 这里只搜索title域,只有title域中有符合条件的数据才做为搜索结果集中的一部分,MUST表示and,MUST_NOT表示not, SHOULD表示or*/
BooleanClause.Occur[] clauses = { BooleanClause.Occur.MUST,
BooleanClause.Occur.MUST_NOT, BooleanClause.Occur.MUST_NOT };

/*
* MultiFieldQueryParser表示多个域解析,
* 同时可以解析含空格的字符串,如果我们搜索"老板",根据前面的索引,显然搜到的是第二份文件
*/
/*Query query = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT,
serchWord, fields, clauses, new StandardAnalyzer(Version.LUCENE_CURRENT));
*/
/*Query query = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT,
serchWord, fields, clauses, new CJKAnalyzer(Version.LUCENE_CURRENT));
*/
Query query=new WildcardQuery(new Term("title","*"+serchWord+"*"));

/* 把搜索出来的所有文件打印出来 */
int thispage = 300;
int startindex = 0; // 前台可利用此参数来控制分页查询
int maxpage = 300; // 每页最多显示50条结果记录
thispage = maxpage;
/* 开始搜索 */
TopDocs h = searcher.search(query, maxpage);

if ((startindex + maxpage) > h.totalHits) {
thispage = h.totalHits - startindex; // set the max index to
// maxpage or last
}

for (int i = startindex; i < (thispage + startindex); i++) {
Document doc = searcher.doc(h.scoreDocs[i].doc); // get the next
// document
String doctitle = doc.get("title"); // get its title
String content = doc.get("content"); // get its path field
String url = doc.get("url"); // get its url field
System.out.println(doctitle);
System.out.println(content);
System.out.println(url);
}

/* 关闭 */
searcher.close();
}

public void closeIndex(IndexWriter indexWriter) {
try {
indexWriter.close();
} catch (Exception e) {
e.printStackTrace();
}
}

/**
* 初始化索引,在系统第一次启动时用,以后只对其做更新、删除等操作,
* 系统再重启后也不需要,因为索引已经以文件的形式存储在的索引文件中,永久有效
*
* @return
*/
public String initIndex() {
/////////添加学生
String title = "wenfujun";
String content = "文付军和文亚龙: congratulations on your success!";
String url = "http://wenfujun_url";
addIndex(title, content, url, "student");

title = "wenbo";
content = "文博和文昊: congratulations on your success!";
url = "http://wenbo_url";
addIndex(title, content, url, "student");

/////////添加班级
title = "05界一班";
content = "一年级一班!";
url = "http://wenfujun_url";
addIndex(title, content, url, "class");

title = "03界二班";
content = "一年级二班";
url = "http://wenbo_url";
addIndex(title, content, url, "class");

//添加学校
title = "山东省东明县第二高级中学";
content = "东明二中是个很不错的中学,是我的母校";
url = "http://wenfujun_url";
addIndex(title, content, url, "school");

title = "山东省菏泽市第一中高级中学";
content = "山东菏泽一中的升学率非常高,教学质量那个高啊!!!";
url = "http://wenbo_url";
addIndex(title, content, url, "school");
return null;
}

/**
* 删除索引
*
* @return
*/
public String deleteIndex() {
try {
this.getWriter(indexFilePath).deleteDocuments(
new Term("title", "1"));
} catch (Exception e) {
e.printStackTrace();
}
return null;
}

/**
* 更新(修改)索引
*
* @return
*/
public String updateIndex(String title, String content, String url) {
Document document = new Document();
// Field对象,相当于数据库中字段
Field FiledTitle = new Field("title", title, Field.Store.YES,
Field.Index.ANALYZED);// Field.Index.ANALYZED 这就能进行索引了,
// 如果设置为NO的话就不能检索
Field FiledContent = new Field("content", content, Field.Store.YES,
Field.Index.ANALYZED);
Field FieldBody = new Field("url", url, Field.Store.YES, Field.Index.NO);
document.add(FieldBody);
document.add(FiledContent);
document.add(FiledTitle);
try {
this.getWriter(indexFilePath).updateDocument(
new Term("title", "1"), document);
} catch (Exception e) {
e.printStackTrace();
}
return null;
}

/**
* 添加索引
* @param title
* @param content
* @param url
* @param indexType--索引类别(student,class,school)
* @return
*/
public boolean addIndex(String title, String content, String url,
String indexType) {
try {
// 增加document到索引去
// document对象,相当于数据库中一条记录
Document document = new Document();
// Field对象,相当于数据库中字段
Field FiledTitle = new Field("title", title, Field.Store.YES,
Field.Index.ANALYZED);// Field.Index.ANALYZED 这就能进行索引了,
// 如果设置为NO的话就不能检索
Field FiledContent = new Field("content", content, Field.Store.YES,
Field.Index.ANALYZED);
Field FieldBody = new Field("url", url, Field.Store.YES,
Field.Index.NO);
document.add(FieldBody);
document.add(FiledContent);
document.add(FiledTitle);
IndexWriter indexWriter = this.getWriter(indexFilePath
+ File.separator + indexType);
indexWriter.addDocument(document);

indexWriter.optimize();
indexWriter.close();
} catch (Exception e) {
e.printStackTrace();
return false;
}
return true;
}

/**
* 判断索引是否存在
* @param indexDir
* @return
*/
public static boolean indexExist(String indexDir) {
boolean isExt = true;
try {
isExt = IndexReader.indexExists(new SimpleFSDirectory(new File(
indexDir)));
} catch (Exception e) {
e.printStackTrace();
}
return isExt;
}

/**
* 获得IndexWriter
* @param indexFilePath--索引目录,在调用此方法时已经指定类型是student、class或school
* @return
* @throws CorruptIndexException
* @throws LockObtainFailedException
* @throws IOException
*/
private IndexWriter getWriter(String indexFilePath)
throws CorruptIndexException, LockObtainFailedException,
IOException {
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
//Analyzer analyzer = new ChineseAnalyzer();
boolean append = true;
File file = new File(indexFilePath + File.separator + "segments.gen");
if (file.exists()) {
append = false;
}
return new IndexWriter(new SimpleFSDirectory(new File(indexFilePath)),
analyzer, append, new IndexWriter.MaxFieldLength(999999999));
}

/**
* 此方法暂时不用了,改写为用addIndex()方法
*
* @throws Exception
*/
public void index() throws Exception {
Directory directory = new SimpleFSDirectory(new File("e://lucene//"));
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);

// 初始化索引,清空或创建e://lucene//下的索引
IndexWriter writer1 = new IndexWriter(directory, analyzer, true,
new IndexWriter.MaxFieldLength(25000));
writer1.close();// 关闭

/*
* 往创建的初始化索引中添加索引内容,StandardAnalyzer表示用lucene自带的标准分词机制,
* false表示不覆盖原来该目录的索引,细心的读者可能已经发现, 这句话和上面的那句就这个false不一样
*/
// IndexWriter writer2 = new IndexWriter("e://lucene//",new
// StandardAnalyzer(), false);
IndexWriter writer2 = new IndexWriter(directory, analyzer, false,
new IndexWriter.MaxFieldLength(25000));
Document doc = new Document();// 创建文件

// 创建一个域ArticleTitle,并往这个域里面添加内容
// "Field.Store.YES"表示域里面的内容将被存储到索引,"Field.Index.ANALYZED"表示域里面的内容将被索引,以便用来搜索
Field articleTitle = new Field("title", "title:文付军和文亚龙",
Field.Store.YES, Field.Index.ANALYZED);
/* 往文件里添加这个域 */
doc.add(articleTitle);

/* 同理:创建另外一个域ArticleText,并往这个域里面添加内容 */
Field srticleText = new Field("content",
"content:文付军和文亚龙: congratulations on your success!",
Field.Store.YES, Field.Index.ANALYZED);
doc.add(srticleText);
// 在这里还可以添加其他域

/* 添加这份文件到索引 */
writer2.addDocument(doc);

/* 同理:创建第二份文件 */
doc = new Document();// 创建文件
articleTitle = new Field("title", "title:文博和文昊", Field.Store.YES,
Field.Index.ANALYZED);
doc.add(articleTitle);
srticleText = new Field("content",
"content:文博和文昊: congratulations on your success!",
Field.Store.YES, Field.Index.ANALYZED);
doc.add(srticleText);
writer2.addDocument(doc);
// 在这里可以添加其他文件

/* 关闭 */
writer2.close();
}

/**
* 暂时不用
* @param type
* @param keyword
*/
public void executeSearch(String type, String keyword) {
Hits result = null;
if (type != null && !"".equals(type) && keyword != null
&& !keyword.equals("")) {
try {
//根据关键字构造一个数组
String key[] = { keyword, type };

//同时声明一个与之对应的字段数组
String fields[] = { "title", "type" };

//声明BooleanClause.Occur[]数组,它表示多个条件之间的关系
BooleanClause.Occur flags[] = { BooleanClause.Occur.MUST,
BooleanClause.Occur.MUST };
//ChineseAnalyzer analyzer = new ChineseAnalyzer();
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);

//用MultiFieldQueryParser得到query对象
Query query = MultiFieldQueryParser.parse(key, fields, flags,
analyzer);

//e://lucene表示索引文件所在目录
IndexSearcher searcher = new IndexSearcher(indexFilePath);

//查询结果
//result = searcher.search(query);

///////////////////////////////////////////
/* 把搜索出来的所有文件打印出来 */
int thispage = 30;
int startindex = 0; // 前台可利用此参数来控制分页查询
int maxpage = 30; // 每页最多显示50条结果记录
thispage = maxpage;
TopDocs h = searcher.search(query, maxpage);

if ((startindex + maxpage) > h.totalHits) {
thispage = h.totalHits - startindex; // set the max index to
// maxpage or last
}

for (int i = startindex; i < (thispage + startindex); i++) {
Document doc = searcher.doc(h.scoreDocs[i].doc); // get the next
// document
String doctitle = doc.get("title"); // get its title
String content = doc.get("type"); // get its path field
System.out.println(doctitle);
System.out.println(content);
}

} catch (Exception e) {
e.printStackTrace();
}
}
//return result;
}

/*
* public ArrayList<BaseItem> getDate(String sql) throws SQLException {
* ArrayList<BaseItem> item = new ArrayList<BaseItem>(); ConnBase dataConn =
* new ConnBase();//数据库连接 conn = dataConn.DBconn(); ps =
* conn.prepareStatement(sql); rs = ps.executeQuery(); //
* jdbcTemplate.execute(sql); while (rs.next()) { BaseItem i = new
* BaseItem(); i.setTitle(rs.getString("title")); // 对应你的Blog表里的title
* i.setContent(rs.getString("content")); // 取表里的博客内容
* i.setUr("SingleArticle_lucene.action?id=" + rs.getInt("blogId")); // 如 a.
* action ?id=8 item.add(i); } // 把数据库里的数据取出来 return item; }
*/

}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: