您的位置:首页 > 编程语言 > PHP开发

php通过调用lucene库实现检索应用

2016-03-05 22:10 609 查看
php通过调用lucene库实现检索应用。注意为下面安装的php,tomcat,java,apache添加path路径。

被检索的数据存放在mysql数据库中,检索的数据是使用python从网络上爬取的,已存储为json格式,直接可以java读取插入数据库。

用到的jar包。

1、gson-2.2.1.jar(用户直接将json格式转化为java对象)

2、lucene-core-3.0.2.jar(lucene库)

3、mysql-connector-java-5.1.37-bin.jar (java连接mysql驱动库)

4、LuceneExample.jar(自己编写的使用Lucene的示例库)

第一步:

安装Java,配置好环境变量。将上面的jar包拷贝到java虚拟机运行环境jre\lib\ext目录下,这样在虚拟机运行时就会自动加载这些库了。

第二步:

安装tomcat,由于php调用java需要用到php-java-bridge,而下载下来的是JavaBridge.war,所以需要用tomcat来将下载的JavaBridge.war文件解析一下,具体方法为:将JavaBridge.war放在tomcat的webapps\目录,启动tomcat,此时tomcat会自动解析在webapps\下的JavaBridge.war,生产JavaBridge文件夹,将这个文件夹拷贝到第二步的apache运行目录。

第三步:

安装php,安装apache,将第一步中得到的JavaBridge文件夹拷贝到htdocs\目录。(JavaBridge文件夹里包含一些类似头文件的东西)

第四步:

环境已经基本建立好了,下面开始具体实施。

1、登录到mysql终端,利用SQL指令建立tiku数据库,建立math数据库表。

create database tiku;

use tiku;

create table math(index_num int(11) primary key not null auto_increment,question text not null,answer text);

建完表后可以 desc math; 查看一下。

2、终端中进入加载数据代码的目录D:\soft\yangyang\luc,数据文件为out.data,为json格式。

编译LoadData.java并执行,

javac LoadData.java

java LoadData

此时在终端中查看math数据库,即可看到数据已存入数据库表math中。

select * from math limit 2;

3、编译生产自己编写的使用Lucene的示例库

同样在上述目录中执行下面命令

javac LuceneExample.java 编译生成class文件

jar -cvf LuceneExample.jar LuceneExample.class 打包class文件

接着将LuceneExample.jar文件拷贝至上面说的jre\lib\ext目录下。

4、编写服务文件

进入apache运行目录htdocs\编写test.php文件来调用上面的LuceneExample.jar库实现检索。具体代码见附录

第五步:

开启服务环境,查看运行结果:

1、首先保证开启mysql服务,可以在cmd中运行命令 net start mysql

2、进入第三步的JavaBridge\WEB-INF\lib目录下,双击运行JavaBridge.jar程序(如果不运行,手动进入该目录开启java -jar JavaBridge.jar,选择8080端口,确定)

3、进入apache的bin目录下双击ApacheMonitor.exe开启apache服务。

4、此时在浏览器中输入http:\localhost\test.php即可看到查询结果(若为乱码,请修改编码方式为UTF-8)。

LoadData.java源码

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.FileInputStream;
import com.google.gson.reflect.TypeToken;
import java.lang.reflect.Type;

import com.google.gson.Gson;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.util.ArrayList;
import java.util.List;

public class LoadData {
public static void loadJson(String fileName){
try{
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), "utf-8"));
String data = br.readLine();
Gson gson = new Gson();
/*
List<TiMu> tt = new ArrayList<TiMu>();
tt.add(new TiMu("fff", "fsdfsd"));
tt.add(new TiMu("444", "3333232"));
System.out.println(gson.toJson(tt));
*/
Type type = new TypeToken<List<TiMu>>(){}.getType();
List<TiMu> ps = gson.fromJson(data, type);
System.out.println(ps.toString());
//JDBC Section
Class.forName("com.mysql.jdbc.Driver").newInstance();
Connection conn = DriverManager.getConnection("jdbc:mysql://localhost:3306/tiku", "root", "buptmm");
String sql="insert into math(question,answer) values(?,?)";
PreparedStatement preStmt =conn.prepareStatement(sql);

for(TiMu ti:ps){
preStmt.setString(1,ti.getQ());
preStmt.setString(2,ti.getA());
preStmt.executeUpdate();
}
} catch (Exception e) {
e.printStackTrace();
}
}

public static void main(String[] args)  {
LoadData.loadJson("out.data");
}

}

public class TiMu{
private String Q;
private String A;

public TiMu(){}

public TiMu(String q, String a){
this.Q = q;
this.A = a;
}

public String getQ(){
return Q;
}

public void setQ(String Q){
this.Q = Q;
}

public String getA(){
return A;
}

public void setA(String A){
this.A = A;
}
}


LuceneExample.java源码

import java.io.File;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.Statement;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;

public class LuceneExample {

public static final File INDEX_DIRECTORY = new File("./");

public void createIndex() {

System.out.println("-- Indexing --");

try {
//JDBC Section
Class.forName("com.mysql.jdbc.Driver").newInstance();
//Assuming database bookstore exists
Connection conn = DriverManager.getConnection("jdbc:mysql://localhost:3306/tiku", "root", "buptmm");
Statement stmt = conn.createStatement();
String sql = "select question from math";
ResultSet rs = stmt.executeQuery(sql);

//Lucene Section
Directory directory = new SimpleFSDirectory(INDEX_DIRECTORY);
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
IndexWriter iWriter = new IndexWriter(directory, analyzer, true,MaxFieldLength.UNLIMITED);

//Looping through resultset and adding to index file
int count = 0;
while(rs.next()) {
Document doc = new Document();
//System.out.println("question=" + rs.getString("question"));
doc.add(new Field("question", rs.getString("question"), Field.Store.YES, Field.Index.ANALYZED ));
//doc.add(new Field("book_title", rs.getString("book_title"), Field.Store.YES, Field.Index.ANALYZED ));
//doc.add(new Field("book_details", rs.getString("book_details"), Field.Store.YES, Field.Index.ANALYZED ));

//Adding doc to iWriter
iWriter.addDocument(doc);
count++;
}

System.out.println(count+" record indexed");

//Closing iWriter
iWriter.optimize();
iWriter.commit();
iWriter.close();

//Closing JDBC connection
rs.close();
stmt.close();
conn.close();

} catch (Exception e) {
e.printStackTrace();
}

}

public String search(String keyword) {

System.out.println("-- Seaching --");
String result = "";
try {

//Searching
IndexReader reader = IndexReader.open(FSDirectory.open(INDEX_DIRECTORY), true);
IndexSearcher searcher = new IndexSearcher(reader);
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
//MultiFieldQueryParser is used to search multiple fields
//String[] filesToSearch = {"question"};
QueryParser mqp = new QueryParser(Version.LUCENE_30, "question" , analyzer);

Query query = mqp.parse(keyword);//search the given keyword
//Query query = new FuzzyQuery(new Term("question", keyword), 0.01f);
System.out.println("query >> " + keyword);

TopDocs hits = searcher.search(query, 5); // run the query

System.out.println("Results found >> " + hits.totalHits);

for (int i = 0; i < hits.totalHits; i++) {
Document doc = searcher.doc(hits.scoreDocs[i].doc);//get the next  document
System.out.println(doc.get("question"));
result = doc.get("question");
break;
}

} catch (Exception e) {
e.printStackTrace();
}
return result;
}

public String getResult(String que){
//String que = search(question);
if(que.equals(""))
return "";
try{
//JDBC Section
Class.forName("com.mysql.jdbc.Driver").newInstance();
Connection conn = DriverManager.getConnection("jdbc:mysql://localhost:3306/tiku", "root", "buptmm");
Statement stmt = conn.createStatement();
String sql = "select answer from math where question='" + que + "'";
System.out.println("sql = " + sql);
ResultSet rs = stmt.executeQuery(sql);
while(rs.next()) {
return rs.getString("answer");
}
} catch (Exception e) {
e.printStackTrace();
}
return "";
}

public static void main(String[] args)  {

LuceneExample obj = new LuceneExample();

//creating index
obj.createIndex();

//searching keyword
//System.out.println("a1: " + obj.search(""));

//using wild card serach
String question = obj.search("5");
System.out.println("a1: " + question);
System.out.println("a1: " + obj.getResult(question));

//using logical operator
//obj.search("data1 OR data2");
//obj.search("data1 AND data2");

}
}


test.php源码

<?php
require_once("JavaBridge/java/Java.inc");

#print "fffff<br/>";
#$param = $_POST['question'];
$tf = new Java('LuceneExample');
$tf->createIndex();
$q = java_values($tf->search("王阿姨"));
#$s = $tf->test();
print "题目:".$q;
print "<br/>";
$a = java_values($tf->getResult($q));
print "答案:".$a;
?>


json数据

[{"Q": "王阿姨买了3千克龙眼和8千克西瓜一共花了46元。已知1千克西瓜的价钱正好是1千克龙眼的1/5。龙眼和西瓜的单价分别是多少元?(5分)", "A": "设1千克西瓜的价钱是x,那么龙眼的价钱是5x,3*5x+8x=46,x=2,所以龙眼的单价是10元,西瓜的单价是2元"},{"Q": "有13个乒乓球,有12个质量相同,另有一个较轻一点,如果用天平称,至少称 次保证能找出这个乒乓球.", "A": "http://www.tiku.cn/q/1010405.html"}, {"Q": "有9瓶钙片,次品的一瓶少了4片.用天平至少称 次可以保证找出次品.", "A": "http://www.tiku.cn/q/1010406.html"}]
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: