您的位置:首页 > 编程语言 > PHP开发

php实现简明英汉词典

2014-11-11 22:17 162 查看
通过读取txt格式的词库,写入memcache缓存。比较简单,基本不存在分词的问题:

前段如下:

<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8" />
<script type="text/javascript">
function openWindow(newWindow)
{
	window.open(newWindow, "՚Пөѯ","height=460px, width=740px, top=100px; left=350px, toolbal=no, menubar=no, scrollbars=no, resizeable=no, location=no, status=no");
}

window.onload=openWindow("view.php");
</script>
</head>

<body>
</body>
</html>


弹出的窗口如下:

<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8" />

<script type="text/javascript">

function getXMLHttpRequest()
{
	var xmlhttp=null;
	if(window.ActiveXObject)
	{
		xmlhttp = new ActiveXObject("Microsoft.XMLHttp");
	}
	else
	{
		xmlhttp=new XMLHttpRequest();
	}
	return xmlhttp;
}

function query()
{
	var url="/ciba/process.php";
	var data="?enword="+$('enWord').value+"&rand="+Math.random();	
	var en=$('enWord').value;
	$('enWord').value="";
	xmlhttp=getXMLHttpRequest();
	 if (xmlhttp)
	 {
		
		xmlhttp.open("get", url+data,true);
		
		
		 xmlhttp.onreadystatechange=function()
		{
			//window.alert(xmlhttp.readyState);
			if (xmlhttp.readyState==4 && xmlhttp.status==200)
			{
				var res=xmlhttp.responseText;
				//res=eval("("+res+")");
				//window.alert(res);
				$("chWord").innerHTML= en+": "+res;   
			}
		} 
		xmlhttp.send(null); 
	 }
}
function sendQuery(event)
{
	if(event.keyCode==13)
	{
		query();
		return false;
	}
}
function clearContent()
{
	$("enWord").value="";
	return false;
}
function $(id)
{
	return document.getElementById(id);
}
</script>

<style rel="stylesheet" type="text/css">
#enWord{border:1px solid blue;
		margin-top:20px;
		}
#sendButton{position:relative;
			bottom:12px;}
#chWord{position:relative;
		left:10px;
		top:50px;
		font-size:20px;
		color:red;}
</style>
<head>

<body>

<img src="logo.jpg" /><br />
<textarea  id="enWord" cols="90" rows="2" onkeydown="sendQuery(event);" onclick="clearContent();">
请输入词条
</textarea>
<input type="button" id="sendButton" value="查询" onclick="query();" /><br />

<div id="chWord"><div>
</body>
</html>


【控制器部分】

<?php
header("content-type: plain/text; charset=utf-8");
require_once "storeWord.php";
if(!empty($_GET['enword']))
{
	$en=$_GET['enword'];
	$mem=new MemStore();
	$ch=$mem->getWord($en);
	$en=$mem->filterWord($en);
	
	$res="<res><en>$en</en><ch>$ch</ch></res>";
	file_put_contents('aword.txt', $res."\r\n",FILE_APPEND);
	//ob_clean();
	echo $ch;
}
else
{
	file_put_contents('aword.txt', "receive NON data \r\n",FILE_APPEND);
}


【后台】

<?php   

require_once('parseWord.php');
class MemStore
{
	private $mem=null;
	private $pat='#^[a-zA-Z]+\b#i';
	
	public function __construct()
	{
		$this->mem=new Memcache();
		$this->mem->connect("127.0.0.1", 11211) or die("connect memcached failed!!!<br />");
	}
	public function __destruct()
	{
		$this->mem->close();
	}
	public function addWord($dic)
	{
		$word=new Word($dic);
		$word->readWord();
		$result=$word->getWord();
		//echo count($result)."字符<br />";
		//exit();
		foreach($result as $en => $ch)
		{
			$this->mem->add($en, $ch, MEMCACHE_COMPRESSED, time()+10*24*3600) or die("添加词条失败". __LINE__ ."<br />");
		}
	}
	
	public function setWord($en,$ch)
	{
		//控制器判断输入是否合法
		$en=$this->filterWord($en);
		$en=$this->mem->get($en) or die("找不到词条 $en");
		$this->mem->set($en, $ch, MEMCACHE_COMPRESSED, time()+31*24*3600) or die("添加词条$en失败");
		
	}
	
	public function getWord($en)
	{
		//控制器判断输入是否合法
		$en=$this->filterWord($en);
		$ch=$this->mem->get($en) or die("找不到词条 $en");
		return $ch;
	}
	
	public function replaceWord($en,$ch)
	{
		//控制器判断输入是否合法
		$en=$this->filterWord($en);
		$en=$this->mem->get($en) or die("找不到词条 $en");
		$this->mem->replace($en, $ch, MEMCACHE_COMPRESSED, time()+31*24*3600) or die("替换词条$en失败");	
	}
	
	public function deleteWord($en)
	{
		//控制器判断输入是否合法
		$en=$this->filterWord($en);
		$this->mem->delete($en,0) or die("删除词条$en失败");
	}
	
	//过滤掉中文,包括空格的词组,长度大于20的词条
	public function filterWord($en)
{
		$en=trim($en);

			if(preg_match('#[\x{4e00}-\x{9fa5},\)\.\(]+#u', $en))
			{
				//过滤掉中文,同时提取其中的英文字符
				 if(preg_match('#\b[a-z]+\b#i', $en, $res))
				{
					if(strlen($res[0])>20)
					{
						//echo "字符过长<br />";
						return strtolower(substr($res[0], 0,20));
					}
					return strtolower($res[0]);
				}
				else
				{
					return " ";
				}
				
			}
		
			else if(preg_match('#\s+#', $en))
			{
					//$en=explode(' ', $en);
				//echo "含有空格<br />";
				$res=null;
				if(preg_match('#[a-z]+#i', $en, $res))
				{
					if(strlen($res[0])>20)
					{
						//echo "字符过长<br />";
						return strtolower(substr($res[0], 0,20));
					}
					return strtolower($res[0]);
				}
			}
			else if(preg_match('#[—_\+\?\*\^\$\#\%\&\/\\,\.!@=\`\'\"\"""]#',$en, $res))
			{
				//
				//echo '含有非法字符<br />';
				if(preg_match('#[a-z]+#i', $en, $res))
				{
					if(strlen($res[0])>20)
					{
						//echo "字符过长<br />";
						return strtolower(substr($res[0], 0,20));
					}
					return strtolower($res[0]);
				}
			}
	
			else if(strlen($en)>20)
			{
				//echo "字符过长<br />";
				return strtolower(substr($en, 0,20));
			}
			else 
			{
				return $en;
			}
		
}
	
	public function flushAll()
	{
		$this->mem->flush();
	}
	
	public function getTime()
	{
		if (function_exists("micro_time"))
		{
			list($usec, $sec) = explode(" ", microtime());
			return ((float)$usec + (float)$sec);
		}
		else
		{
			return time();
		}
	}
}

//$mem=new MemStore();
//$mem->addWord('ciba.txt');
//$mem->flushAll();
//$mem->replaceWord('abandon', 100000000);
//$mem->deleteWord('abandon');
//echo $mem->getWord('_*&^%abandon^%$#');
//echo "ok"; 

?>


【解析库】

<?php 	
class Word
{
	private $query_en='#\w+\b#i';
	private  $query_ch='#[\x{4e00}-\x{9fa5}][\x{4e00}-\x{9fa5},\)\.\( \w]*#u';
	private $arr_word=array();
	private  $recycle_num=100;
	private  $fp=null;
	
	public function __construct($fileName)
	{
		$this->fp=fopen($fileName,'r') or die('打开ciba失败');
	}
	
	
	public function readWord()
	{
			while(!feof($this->fp))
			{
				$word=fgets($this->fp);
				$word=trim($word);
				if($word=='') continue;
				
				$en=$this->parseEn($word);
				$ch=$this->parseCh($word);
				$this->arr_word["$en"]=$ch;
				
				/* $this->recycle_num--;
				if($this->recycle_num==0) return; */
			
				
			}
	}
	public function parseEn(&$word)
	{
		if(preg_match($this->query_en, $word, $en))
		{
			return $en[0];
		}
		else
		{
			echo "match english word failed<br />";
		}
	}
	public function parseCh(&$word)
	{
		if(preg_match($this->query_ch, $word, $ch))
		{
			return $ch[0];
		}
		else
		{
			echo "match chinese failed<br />";
		}
	}
	
	public  function getWord()
	{
		return $this->arr_word;
	}
	
	public function __destruct()
	{
		fclose($this->fp);
	}
}

//$word=new Word('ciba.txt');
//$word->readWord();
//echo "<pre>";
//print_r($word->getWord());
//echo "</pre>";  */

?>
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: