您的位置:首页 > 其它

解析抓取时间为汉字格式的(汉字的时间格式转化为数字格式)

2012-08-22 16:06 501 查看
//Regex regexTime = new Regex(@"(?<time>二[\S]+[\u4e00-\u9fa5]+年[\u4e00-\u9fa5]+月[\u4e00-\u9fa5]+日)");
Regex regexTime = new Regex(@"(?<time>二[一二三四五六七八九十〇○O]{3}年[一二三四五六七八九十〇○0]{1,2}月[一二三四五六七八九十〇○O]{1,3}日)");
// string ChineseTime = "二〇〇八年五月二十二日";
try
{
if (regexTime.IsMatch(TempHTMLContent))
{
string ChineseTime = regexTime.Match(TempHTMLContent).Groups["time"].Value;
item.CreateTime = DateTime.Parse(ChineseToNumDateTime(ChineseTime));
}

}
catch
{
CommonFunction.logWirte("抓取匹配时间出错:源是 " + matchList[i].Value, IWOMWebCrawlerDbLayer.Common.LogGrade.Warning);
}


private string ChineseToNumDateTime(string ChineseTime)
{
string tempTM = string.Empty;
Regex regTM = new Regex(@"(?<Y>[\s\S]+?年)(?<M>[\s\S]+?月)(?<D>[\s\S]+?日)");
string tempYear = regTM.Match(ChineseTime).Groups["Y"].Value;
tempTM = ConvertTime(tempYear);
string tempMonth = regTM.Match(ChineseTime).Groups["M"].Value;
tempTM += ConvertTime(tempMonth);
string tempDay = regTM.Match(ChineseTime).Groups["D"].Value;
tempTM += ConvertTime(tempDay);
return tempTM;
}
private string ConvertTime(string Time)
{
string num = string.Empty;
string n = string.Empty;
for (int j = 0; j < Time.Length; j++)
{
if (j == 0&Time[0].ToString()=="十")
{
num = "1";
continue;
}
n = Time[j].ToString();
switch (n)
{
case "〇": num = num + "0"; break;
case "○": num = num + "0"; break;
case "O": num = num + "0"; break;
case "一": num = num + "1"; break;
case "二": num = num + "2"; break;
case "三": num = num + "3"; break;
case "四": num = num + "4"; break;
case "五": num = num + "5"; break;
case "六": num = num + "6"; break;
case "七": num = num + "7"; break;
case "八": num = num + "8"; break;
case "九": num = num + "9"; break;
case "年": num = num + "-"; break;
case "月": num = num + "-"; break;
default: break;
}

}
return num;
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: