您的位置:首页 > 数据库 > Redis

城市查询-汉字转拼音、拼音、全拼、简拼、混拼、卷舌音、前后鼻音、兼容查询C#与nodejs+redis应用---笔记

2016-08-29 13:25 961 查看
原文地址:http://blog.csdn.net/gzy11/article/details/52351697
第一步:用C#简单实现拼音的全拼简拼卷舌音,前后鼻音兼容等功能。并建立redis所需查询索引等。
只是简单实现了该功能,对性能无优化。前提条件:城市拼音手动建立好。如:北京 全拼  BeiJing  区分大小写,简拼BJ 。简拼可有可无,没有的话,自己截取出来即可。
关于全拼的获取,一种是源数据中包含拼音,另一种是通过汉字获取拼音。该方法通过微软的
Microsoft Visual Studio International ,这玩意有两个版本我们用到的是1.0版。2.0版是1.0版的增强版。需要 引用ChnCharInfo.dll
Microsoft Visual Studio International Feature Pack 2.0 
Microsoft Visual Studio International Pack 1.0 
代码如下:
版本:1.0.0.14
using Microsoft.International.Converters.PinYinConverter;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace XXXXXX.Toolkits.Common
{
public class SpellIndexHelper
{
/// <summary>
/// 声母
/// </summary>
private static string[,] initialsList = new string[3, 2] { { "Z", "Zh" }, { "C", "Ch" }, { "S", "Sh" } };

/// <summary>
/// 韵母 Finals
/// </summary>
private static string[,] finalsList = new string[5, 2] { { "an", "ang" }, { "en", "eng" }, { "in", "ing" }, { "An", "Ang" }, { "En", "Eng" } };

/// <summary>
/// 声母加韵母
/// </summary>
private static string[,] spellList = new string[8, 2] { { "Z", "Zh" }, { "C", "Ch" }, { "S", "Sh" }, { "an", "ang" }, { "en", "eng" }, { "in", "ing" }, { "An", "Ang" }, { "En", "Eng" } };

#region 汉字转化拼音

/// <summary>
/// 汉字转化为拼音
/// </summary>
/// <param name="chinese">汉字</param>
/// <returns>全拼</returns>
public static string GetSpell(string chinese, bool isShortSpell = false)
{
string result = string.Empty;
foreach (char spellChar in chinese)
{
if (spellChar >= 0x4e00 && spellChar <= 0x9fbb)//判断是否是中文
{
ChineseChar chineseChar = new ChineseChar(spellChar);
result += Capitalize(chineseChar.Pinyins[0].ToString());
}
}
return result;
}
/// <summary>
/// 首字母变为大写
/// </summary>
/// <param name="spell"></param>
/// <param name="isShortSpell"></param>
/// <returns></returns>
private static string Capitalize(string spell, bool isShortSpell = false)
{
return isShortSpell ? spell.Substring(0, 1).ToUpper() : spell.Substring(0, 1).ToUpper() + spell.Substring(1, spell.Length - 2).ToLower();
}

#endregion

#region 包含函数

/// <summary>
/// 中文名称匹配
/// </summary>
/// <param name="value"></param>
/// <param name="query"></param>
/// <returns></returns>
public static bool IsChineseMatch(string value, string query)
{
if (string.IsNullOrWhiteSpace(query) || string.IsNullOrWhiteSpace(value)) return false;
if (query.Length > value.Length) return false;
int len = query.Length;
return value.ToLower().Substring(0, len).Contains(query.ToLower());

}
/// <summary>
/// 全拼匹配
/// </summary>
/// <param name="value"></param>
/// <param name="query"></param>
/// <returns></returns>
public static bool IsSpellMatch(string value, string query)
{
if (string.IsNullOrWhiteSpace(query) || string.IsNullOrWhiteSpace(value)) return false;
if (IsSpellContains(value, query)) return true;
return IsSpellAppendContains(value, query);
}

/// <summary>
/// 直接对比是否包含
/// </summary>
/// <param name="value"></param>
/// <param name="query"></param>
/// <returns></returns>
private static bool IsSpellContains(string value, string query)
{
if (query.Length > value.Length) return false;
return value.Substring(0, query.Length).ToLower().Contains(query.ToLower());

}

/// <summary>
/// 对比增量是否包含
/// </summary>
/// <param name="value"></param>
/// <param name="query"></param>
/// <returns></returns>
private static bool IsSpellAppendContains(string value, string query)
{
string queryAppend = Append(query, true).ToLower();
string valueAppend = Append(value, true).ToLower();
if (queryAppend.Length > valueAppend.Length) return false;
return IsSpellContains(valueAppend, queryAppend);
}
#endregion

/// <summary>
/// 追加模糊匹配的全部增量(BeiJin->BeiJing)
/// </summary>
/// <param name="spell"></param>
/// <returns></returns>
public static string Append(string spell)
{
//for (int i = 0; i < 8; i++)
//{
//    spell = spell.Replace(spellList[i, 0], spellList[i, 1]);
//}
//spell = spell.Replace("hh", "h");
//spell = spell.Replace("gg", "g");
//return spell;
return Append(spell, false);
}

/// <summary>
/// 追加模糊匹配的全部增量并转换为小写(BeiJin->beijing)
/// </summary>
/// <param name="spell"></param>
/// <param name="isLower"></param>
/// <returns></returns>
public static string Append(string spell, bool isLower)
{
spell = isLower ? spell.ToLower() : spell;
for (int i = 0; i < 8; i++)
{
spell = isLower ? spell.Replace(spellList[i, 0].ToLower(), spellList[i, 1].ToLower()) : spell.Replace(spellList[i, 0], spellList[i, 1]);
}
spell = spell.Replace("hh", "h");
spell = spell.Replace("gg", "g");
return spell;
}

/// <summary>
/// 追加声母
/// </summary>
/// <param name="spell"></param>
/// <returns></returns>
public static string AppendInitials(string spell)
{
for (int i = 0; i < 3; i++)
{
spell = spell.Replace(initialsList[i, 0], initialsList[i, 1]);
}
spell = spell.Replace("hh", "h");
return spell;
}

/// <summary>
/// 追加韵母
/// </summary>
/// <param name="spell"></param>
/// <returns></returns>
public static string AppendFinals(string spell)
{
for (int i = 0; i < 5; i++)
{
spell = spell.Replace(finalsList[i, 0], finalsList[i, 1]);
}
spell = spell.Replace("gg", "g");
return spell;
}

/// <summary>
/// 去掉模糊匹配全部增量(beijing->beijin)
/// </summary>
/// <param name="spell"></param>
/// <returns></returns>
public static string Remove(string spell)
{
for (int i = 0; i < 8; i++)
{
spell = spell.Replace(spellList[i, 1], spellList[i, 0]);
}
return spell;
}

/// <summary>
/// 去掉模糊匹配声母
/// </summary>
/// <param name="spell"></param>
/// <returns></returns>
public static string RemoveInitials(string spell)
{
for (int i = 0; i < 3; i++)
{
spell = spell.Replace(initialsList[i, 1], initialsList[i, 0]);
}
return spell;
}

/// <summary>
/// 去掉模糊匹配韵母
/// </summary>
/// <param name="spell"></param>
/// <returns></returns>
public static string RemoveFinals(string spell)
{
for (int i = 0; i < 5; i++)
{
spell = spell.Replace(finalsList[i, 1], finalsList[i, 0]);
}
return spell;
}

/// <summary>
/// 根据大小写分割拼音(BeiJing,分割为Bei Jing)
/// </summary>
/// <param name="spell"></param>
/// <returns></returns>
public static List<string> SplitSpell(string spell)
{
if (string.IsNullOrWhiteSpace(spell))
{
return null;
}
int length = spell.Length;
List<string> list = new List<string>();
string splitPY = null;
for (int i = 0; i < length; i++)
{
if (char.IsUpper(spell, i))//大写
{
if (splitPY != null)
list.Add(splitPY);
splitPY = null;//清空
splitPY += spell.Substring(i, 1);
if (i == length - 1)//如果是最后一个
{
list.Add(splitPY);
}
}
if (char.IsLower(spell, i))//小写
{
splitPY += spell.Substring(i, 1);
if (i == length - 1)//如果是最后一个
{
list.Add(splitPY);
}
}
}
return list;
}

/// <summary>
/// 创建所有混拼索引
/// </summary>
/// <param name="shortSpell"></param>
/// <param name="spell"></param>
/// <returns></returns>
public static string CreateHybridIndex(string shortSpell, string spell)
{
List<List<string>> list = new List<List<string>>(); //第一层有多少个分割的拼音,第二层拼音
list.Add(SplitSpell(shortSpell));                   //添加原始数据---简拼
list.Add(SplitSpell(AppendInitials(shortSpell)));   //添加补全声母---简拼
list.Add(SplitSpell(spell));                        //添加原始数据---全拼
list.Add(SplitSpell(AppendInitials(spell)));        //添加补全声母---全拼
list.Add(SplitSpell(Append(spell)));                //添加补全-------全拼
list.Add(SplitSpell(AppendFinals(spell)));          //添加补全韵母---全拼
list.Add(SplitSpell(RemoveInitials(spell)));        //移除所有声母---全拼
list.Add(SplitSpell(RemoveFinals(spell)));          //移除所有韵母---全拼
list.Add(SplitSpell(Remove(spell)));                //移除所有-------全拼
list = Reverse(list); //翻转拼音

List<string> resultList = null;
if (list.Count >= 2)
{
int len = list.Count - 1;
for (int i = 0; i < len; i++)
{
if (resultList == null)
resultList = GetCombination(list[i], list[i + 1]);
else
resultList = GetCombination(resultList, list[i + 1]).Distinct().ToList();
}
}
return GetCombinationToString(resultList);
}

/// <summary>
/// 反转集合
/// </summary>
/// <param name="list"></param>
/// <returns></returns>
private static List<List<string>> Reverse(List<List<string>> list)
{
List<List<string>> resultList = new List<List<string>>();
int length = list[0].Count;
for (int i = 0; i < length; i++)
{
List<string> li = new List<string>();
foreach (var item in list)
{
li.Add(item[i]);
}
resultList.Add(li);
}
return resultList;
}

/// <summary>
/// 拼音的组合
/// </summary>
/// <param name="first"></param>
/// <param name="last"></param>
/// <returns></returns>
private static List<string> GetCombination(List<string> first, List<string> last)
{
int lenFirst = first.Count;
int lenLast = last.Count;
List<string> result = new List<string>();
for (int i = 0; i < lenFirst; i++)
{
for (int j = 0; j < lenLast; j++)
{
result.Add(first[i] + last[j]);
}
}
return result.Distinct().ToList();
}

/// <summary>
/// 转换成字符串
/// </summary>
/// <param name="list"></param>
/// <returns></returns>
private static string GetCombinationToString(List<string> list)
{
string result = null;
foreach (var item in list)
{
result += item + ";";
}
return result.Substring(0, result.Length - 1);
}

/// <summary>
/// 去重
/// </summary>
/// <param name="hybridSpell"></param>
/// <returns></returns>
private static string Distinct(string hybridSpell)
{
var list = hybridSpell.Split(new char[] { ';' }, StringSplitOptions.RemoveEmptyEntries).Distinct();
string result = null;
foreach (var item in list)
{
result += item + ";";
}
return result.Substring(0, result.Length - 1);
}
}
}

C#调用
    [TestMethod]
        public void GetSpell()
        {
            //string str = "2017-12-12 12:12:12";
            string result = SpellIndexHelper.GetSpell("456北京"); //获取汉字拼音
        }
        [TestMethod]
        public void CreateHybridIndex()
        {
            string shortSpell="BJ";
            string spell = SpellIndexHelper.GetSpell("456北京");
            SpellIndexHelper.CreateHybridIndex(shortSpell,spell);//建立索引
        }
         [TestMethod]
        public void IsSpellMatch()
        {
            //string str = "2017-12-12 12:12:12";
            SpellIndexHelper.IsSpellMatch("jinan", "jin");//C#中简单 卷舌音 前后鼻音 查询
        }

效果如下:





生成混拼索引后存入SQL数据库中,效果如下:



第二步:用nodejs实现查询功能。
nodejs的异步真的很操蛋啊,真是个陷阱。从redis取数据的时候真麻烦。
nodejs 代码如下:
<pre class="javascript" name="code">var dbHelper = require('../../WebApi/MSSQL/dbHelper.js');
var cluster = require('cluster');
var numCPUs = require('os').cpus().length;

//var idd=1;
/*dbHelper.select("dbo.[InsuranceRegion]",1,"where id=@id",{id: idd},"order by id",function(err,result){
console.log(result);
});*/
//dbHelper.select("dbo.InsuranceRegion","","","","order by RegionId",function(err,result){
//console.log(result);
//});
var redis = require("redis");
var express = require("express");
var app=express();
var  isSuccess =false;
var client  = redis.createClient('6379', '192.168.151.87');
// redis 链接错误
client.on("error", function(error) {
console.log(error);
});
var convertNullToEmptyStr=function(data){
return data==null?"":data;
};
var convertToBool=function(data){
return data==null?0:1;
};
/*
var json = "{'name':'aa','age':18}";
var jsonobj = eval('('+json+")");
for (var key in jsonobj) {
console.log(key)
}*/
var sql ="SELECT  [InsuranceRegion].[RegionId]  ,[CityID]    ,[Name]    ,[Spell]    ,[FullName]    ,[Level]    ,[OrderNumber] ,[ShorSpell],[HotFlag] ,[HotOrderNumber],[LicensePlateCode] ,[SpellAdapter] ,[SpellIndex],[YGBX]    ,[CCIC]    ,[AXATP]    ,[PICC]    ,[CPIC]    ,[PAIC]    ,[ZABX] FROM [Finance].[dbo].[InsuranceRegion] left  join [InsuranceRegionMapping] on [InsuranceRegion].RegionId =[InsuranceRegionMapping].GBCode"
dbHelper.querySql(sql,"",function(err,result){
for (var i in result){
client.hmset(
"baoxian:Region:RegionId:"+result[i].RegionId,
"RegionId",result[i].RegionId,
"CityID", convertNullToEmptyStr(result[i].CityID),
"Name", convertNullToEmptyStr(result[i].Name),
"Spell",convertNullToEmptyStr(result[i].Spell),
"FullName", convertNullToEmptyStr(result[i].FullName),
"Level",  convertNullToEmptyStr(result[i].Level),
"OrderNumber",  convertNullToEmptyStr(result[i].OrderNumber),
"ShorSpell",  convertNullToEmptyStr(result[i].ShorSpell),
"HotFlag", convertNullToEmptyStr( result[i].HotFlag),
"HotOrderNumber",  convertNullToEmptyStr( result[i].HotOrderNumber),
"LicensePlateCode",  convertNullToEmptyStr( result[i].LicensePlateCode),
"SpellIndex",  convertNullToEmptyStr( result[i].SpellIndex),
"SpellAdapter",  convertNullToEmptyStr( result[i].SpellAdapter),
"YGBX",convertToBool( result[i].YGBX),
"CCIC",convertToBool( result[i].CCIC),
"AXATP",convertToBool( result[i].AXATP),
"PICC",convertToBool( result[i].PICC),
"CPIC",convertToBool( result[i].CPIC),
"PAIC",convertToBool( result[i].CPIC),
"ZABX",convertToBool( result[i].CPIC)
);
if(result[i].Level==2){//建立城市索引
client.sadd(["baoxian:Region:Level:2", result[i].RegionId], function(err, reply) {
//console.log(reply); // 1
});
createQueryIndex(result[i].Name,result[i].RegionId);      //建立汉字查询索引
createQueryIndex(result[i].FullName,result[i].RegionId);  //建立汉字查询索引
createQuerySpell(result[i].SpellIndex,result[i].RegionId);//建立混拼查询索引。
}
else if(result[i].Level==1){//建立省索引
client.sadd("baoxian:Region:Level:1",result[i].RegionId);
}
else if(result[i].Level==3){//建立城镇区县索引
client.sadd("baoxian:Region:County:CityID:"+result[i].RegionId.toString().substring(0,4)+"00",result[i].RegionId);
}
}
});

var createQuerySpell=function(data,regionId){
console.log(data);
var arry = data.split(";") ;
for(var i in arry){
if(arry[i]!="")
{
createQueryIndex(arry[i],regionId);
}
}
}
//建立查询索引
var createQueryIndex=function(data,regionId){
var len = data.length;
for(var i=1;i<=len;i++){
client.sadd(["baoxian:Region:Query:"+ data.toLowerCase().substring(0,i),regionId], function(err, reply) {
//console.log(reply); // 1
});
}
};
//建立移动站热点城市查询
var createMHotCityIndex=function(){
var mHotCityIndex=new Array(110100,310100,440100,440300,320100,320500,330100,370100,370200,420100,430100,210100,510100,500100,610100,340100,350100,220100,130100,410100,120100,210200);
for (var i in mHotCityIndex){
client.sadd(["baoxian:Region:MHotCity",mHotCityIndex[i]], function(err, reply) {
//console.log(reply); // 1
});
// client.zadd("baoxian:Region:HotCity",result[i].HotOrderNumber,result[i].RegionId);
}
};
createMHotCityIndex();
dbHelper.select("dbo.InsuranceRegion","","where Level=2 and HotFlag=1 ","","order by [HotOrderNumber]",function(err,result){
//console.log(result);
for (var i in result){
client.sadd(["baoxian:Region:HotCity",result[i].RegionId], function(err, reply) {
//console.log(reply); // 1
});
// client.zadd("baoxian:Region:HotCity",result[i].HotOrderNumber,result[i].RegionId);
}
});
if (cluster.isMaster) {
// Fork workers. fock num of CPUS - 1 works
for (var i = 1; i <= numCPUs; i++) {
cluster.fork();
}

cluster.on('exit', function(worker, code, signal) {
console.log('worker ' + worker.process.pid + ' died');
});
cluster.on('fork', function(worker, code, signal) {
console.log('worker ' + worker.process.pid + ' is online');
});
} else {
app.get('/CityQuery', function(req, res) {
res.writeHead(200, {
"Content-Type": "text/plain;charset=utf-8"
});
var data={
Result:true,
Message:null,
Data:null
};
try  {
var key = req.query.key;
console.log(key);
var jsonp = req.query.jsoncallback;
key=decodeURIComponent(key).toLowerCase();
if(key==""|| key == null || key == undefined){
data.Result=false;
data.Message ="查询参数为空!";
res.end(jsonp+"("+JSON.stringify(data)+")");
}
else{
client.smembers('baoxian:Region:Query:'+key, function(err, reply) {
var len = reply.length
console.log(len); console.log(reply);
if(len==0) {
//data.Result=t;
data.Message ="没有匹配项!";
return res.end(jsonp + "(" + JSON.stringify(data) + ")");
}
var queryResult=new Array([len]);
var j=0;
for(var i=0 ;i<len;i++) {
client.hgetall("baoxian:Region:RegionId:" + reply[i], function (err, replyData) {
queryResult[j]=replyData;
j++;
if(queryResult[len-1]!=undefined){
data.Data = queryResult;
res.end(jsonp+"("+JSON.stringify(data)+")");
}
});
}
});
}
}
catch (error){
console.log("[error]:"+error.name+error.message);
res.end(error.name+error.message);
}

//WriteLogs(isSuccess,res);
//res.end("数据提交完毕!");
//console.log(req.query.key);

});
app.get('/HotCity', function(req, res) {
var data={
Result:true,
Message:null,
Data:null
};
try  {
var jsonp =req.query.jsoncallback;
console.log(jsonp);
client.smembers('baoxian:Region:HotCity', function(err, reply) {
var len = reply.length;
var queryResult=new Array([len]);
var j=0;
for(var i=0 ;i<len;i++) {
client.hgetall("baoxian:Region:RegionId:" + reply[i], function (err, replyData) {
queryResult[j]=replyData;
j++;
if(queryResult[len-1]!=undefined){
data.Data = queryResult;
//console.log(jsonp+JSON.stringify(queryResult));
res.end(jsonp+"("+JSON.stringify(data)+")");
}
});
}
});
}
catch (error){
console.log("[error]:"+error.name+error.message);
res.end(error.name+error.message);
}
res.writeHead(200, {
"Content-Type": "text/plain;charset=utf-8"
});
//WriteLo
});
app.get('/HotMCity', function(req, res) {

});
app.listen(9800);
}

nodejs中MSSQL查询,参考:http://blog.csdn.net/gzy11/article/details/52117793


nodejs查询效果如下:

内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: