您的位置:首页 > 编程语言

sphinx实时搜索代码

2015-12-07 22:32 295 查看
sphinx实时索引效率很高,上千万条记录也能在不到一秒时间内搜索出来,这一点有时候可以作为数据库的预处理,比如,搜索出的新闻Id,再根据这些Id分页显示,效率很高,基本上如果是搜索导致并发性很高的网站,或搜索导致数据库性能成为瓶颈的网站,都可以这样处理一下,减少数据库的查询,我们都知道,数据库的like全查询,那是不走索引的,基本上几百万条数据可能效率就很低了。

这里是一个实例,需要的话请参考,谢谢您的关注。

首先添加一个配置文件 rt_article.conf

index rt_article

{

type = rt

rt_mem_limit = 512M

path = /data/sphinx/data/rt_article

docinfo = extern

#charset_type = utf-8

morphology = none

min_word_len = 1

min_prefix_len = 0

html_strip = 1

html_remove_elements = style, script

rt_field = articleName

rt_field = tags

rt_field = content

rt_attr_timestamp = announceTime

rt_attr_string = title

rt_attr_string = keywords

rt_attr_string = newsContent

ngram_len = 1

#ngram_chars = 0..9, _, a..z, U+3000..U+2FA1F

#charset_table = 0..9, A..Z->a..z, _, a..z, U+3000..U+2FA1F

#ngram_chars = 0..9, _, a..z, U+00A0..U+2FFFF

#charset_table = 0..9, A..Z->a..z, _, a..z, U+00A0..U+2FFFF

ngram_chars = U+4E00..U+9FBF, U+3400..U+4DBF, U+20000..U+2A6DF, U+F900..U+FAFF,\

U+2F800..U+2FA1F, U+2E80..U+2EFF, U+2F00..U+2FDF, U+3100..U+312F, U+31A0..U+31BF,\

U+3040..U+309F, U+30A0..U+30FF, U+31F0..U+31FF, U+AC00..U+D7AF, U+1100..U+11FF,\

U+3130..U+318F, U+A000..U+A48F, U+A490..U+A4CF, 0..9, _, a..z

}

searchd
{
listen = 9312
listen = 9306:mysql41

log = /usr/local/sphinx/var/log/searchd.log
query_log = /usr/local/sphinx/var/log/query.log

read_timeout = 5
client_timeout = 300

max_children = 30
persistent_connections_limit = 30

pid_file = /usr/local/sphinx/var/log/searchd.pid

seamless_rotate = 1
preopen_indexes = 1
unlink_old = 1
mva_updates_pool = 1M
max_packet_size = 8M
max_filters = 256

max_filter_values = 4096
max_batch_queries = 32
workers = threads # for RT to work

binlog_path = /usr/local/sphinx/var/data
}

然后 ./bin/searchd --config=/usr/local/sphinx/etc/rt_article.conf

启动了sphinx服务后,可以先写一段代码向索引中插入一些记录(这些记录实际中是从mysql读取后写入的)先写

先插入一些数据,然后再测试查询 sphinx_article.php,内容如下:

<?php

$link = mysql_connect('127.0.0.1:9306') or die('can not connect server');

if (! $link) {

echo mysql_errno().': ' . mysql_error()."\n";

}

$id = 27;

$articleName = '学习linux吧,最好是lnmp方面的知识';

$tags = 'linux,nginx,mysql,php,python';

$content = '本文介绍的Linux定制专栏文章关于Gnome 3桌面,这种桌面与Xfce和KDE大不一样。作者在本文中介绍了配置和定制Gnome外壳的方法,但由于Gnome 3采用了模块化设计方式,还可以把Gnome外壳完全换成别的系统,这种系统提供了全然不同的桌面,配置和定制方面有着全然不同的可能性。';

$announceTime = time();

$sql = "insert into rt_article(id,articleName,tags,content,title,keywords,newsContent,announceTime) values($id,'{$articleName}','{$tags}','{$content}','{$articleName}','{$tags}','{$content}',{$announceTime}) ";

$result = mysql_query($sql);

if(! $result) {

echo '未能执行成功!'."\n";

}

if(mysql_errno()) {

echo mysql_errno().': '.mysql_error()."\n";

echo $sql;

mysql_close($link);

exit;

}

$query = mysql_query("select * from rt_article where match('配置');");

if(mysql_errno()) {

echo mysql_errno().': '.mysql_error()."\n";

mysql_close($link);

exit;

}

while($row = mysql_fetch_row($query,MYSQL_ASSOC))

{

var_dump($row);

}

mysql_close($link);

exit;

?>

运行,如何能正确查询出插入的那条数据,就没有问题了

然后测试一下搜索:

文件sphinx_test.php,注意这里引用了sphinxapi.php,我改名了,为sphinx_client.php

这里假定搜索框中的关键词是用空格分开的,然后测试一下搜索吧。

<?php

include('sphinx_client.php');

$sphinx_client = new SphinxClient();

$sphinx_client->SetServer('192.168.1.192',9312);

//$sphinx_client->SetArrayResult(TRUE);

//$sphinx_client->SetIDRange(1,20);

//$sphinx_client->SetFilter('group_id',[1,2]);

$sphinx_client->SetLimits(0,100);

$weights = ['articleName'=>3,'tags'=>2,'content'=>1];

$sphinx_client->SetFieldWeights($weights);

if(! empty($_POST['keywords'])) {

$keywords = htmlentities($_POST['keywords']);

$keywords = explode(' ',$keywords);

// 排序:有关键字查询则用相关度排序,否则用 id 倒排

$sphinx_client->SetSortMode(SPH_SORT_RELEVANCE);

//排序,weight第一,id降序排列

$sphinx_client->SetSortMode(SPH_SORT_EXTENDED, '@weight desc,$id desc');

$tmpArr = [];

$str_query = '';

foreach($keywords as $value) {

$str_query .= '"'.$value.'" | ';

$res = $sphinx_client->Query('@tags ('.$value.')', 'rt_article');

if(isset($res['matches'])) {

$searchedArr = array_keys($res['matches']);

echo $value.json_encode($searchedArr).'<br/>';

if(count($tmpArr) >0) {

$tmpArr = array_intersect($tmpArr,$searchedArr);

}

else {

$tmpArr = $searchedArr;

}

}

}

$result = array_values($tmpArr);

echo '111,result:'.json_encode($result).'<br/>';

if(count($result) < 3) {

$str_query = substr($str_query,0,-2);

}

$res = $sphinx_client->Query('@tags ('.$str_query.')', 'rt_article');

if(isset($res['matches'])) {

$ret = array_keys($res['matches']);

echo '222,ret:'.json_encode($ret).'<br/>';

$result = array_unique(array_merge($result,$ret));

}

echo '333,result:'.json_encode(array_values($result)).'<br/>';

// $sphinx_client->SetMatchMode(SPH_MATCH_EXTENDED2);

// $res = $sphinx_client->Query('@title (测试)','rt');

// $res = $sphinx_client->Query('@title (测试) @content (网络)','rt');

echo '<pre>';

//var_dump($res);

print_r($sphinx_client->GetLastError());

print_r($sphinx_client->GetLastWarning());

echo '</pre>';

}

else {

$sphinx_client->SetSortMode(SPH_SORT_EXTENDED, '@id DESC');

$res = $sphinx_client->Query('','rt_article');

echo '<pre>';

print_r($res['matches']);

//var_dump($res);

print_r($sphinx_client->GetLastError());

print_r($sphinx_client->GetLastWarning());

echo '</pre>';

}

function searchArticle($keywords)

{

$str_query = '';

foreach($keywords as $value) {

$str_query .= '"'.$value.'" | ' ;

}

$str_query = substr($str_query,0,-2);

$str_query = "'{$str_query}'";

echo "[$str_query]";

// 排序:有关键字查询则用相关度排序,否则用 id 倒排

$sphinx_client->SetSortMode(SPH_SORT_RELEVANCE);

//排序,weight第一,id降序排列

$sphinx_client->SetSortMode(SPH_SORT_EXTENDED, '@weight desc,$id desc');

$res = $sphinx_client->Query($str_query,'rt_article');

// $sphinx_client->SetMatchMode(SPH_MATCH_EXTENDED2);

// $res = $sphinx_client->Query('@title (测试)','rt');

// $res = $sphinx_client->Query('@title (测试) @content (网络)','rt');

echo '<pre>';

if(isset($res['matches'])) {

print_r($res['matches']);

}

//var_dump($res);

print_r($sphinx_client->GetLastError());

print_r($sphinx_client->GetLastWarning());

echo '</pre>';

}

function getRecommend($keywords)

{

$tmpArr = [];

$str_query = '';

foreach($keywords as $value) {

$str_query .= '"'.$value.'" | ';

$res = $sphinx_client->Query('@tags ('.$value.')', 'rt_article');

if(isset($res['matches'])) {

$searchedArr = array_keys($res['matches']);

echo $value.json_encode($searchedArr).'<br/>';

if(count($tmpArr) >0) {

$tmpArr = array_intersect($tmpArr,$searchedArr);

}

else {

$tmpArr = $searchedArr;

}

}

}

$result = array_values($tmpArr);

echo '111,result:'.json_encode($result).'<br/>';

if(count($result) < 3) {

$str_query = substr($str_query,0,-2);

}

$res = $sphinx_client->Query('@tags ('.$str_query.')', 'rt_article');

if(isset($res['matches'])) {

$ret = array_keys($res['matches']);

echo '222,ret:'.json_encode($ret).'<br/>';

$result = array_unique(array_merge($result,$ret));

}

echo '333,result:'.json_encode(array_values($result)).'<br/>';

}

?>

<!DOCTYPE html>

<html>

<head>

<title>健康资讯详情分享版</title>

<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />

<meta name="viewport" content="initial-scale=1.0, user-scalable=no" />

<meta name="apple-mobile-web-app-capable" content="yes" />

<meta name="apple-mobile-web-app-status-bar-style" content="black" />

<meta name="format-detection" content="telephone=no">

</head>

<body>

<div style="width:100%">

<form action="sphinx_test.php" method="post">

<input type="text" name="keywords" value="" placeHolder="请输入关键字,中间以空格分开" />

<input type="submit" name="btnSubmit" value="搜索">

</form>

</div>

</body>

</html>
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: