您的位置:首页 > 其它

获取国家统计局行政区划表

2009-11-05 08:51 197 查看
一、国家统计局网站有最新最全的全国行政区划表,从网页源代码看来,靠,我都怀疑是不是用电子表格或是文字处理软件做的,还“国家”统计局呢。
http://www.stats.gov.cn/tjbz/xzqhdm/t20090626_402568086.htm
http://www.stats.gov.cn/tjbz/xzqhdm/index.htm
http://www.stats.gov.cn/tjbz/index.htm





二、用程序把它收回来

using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
using System.IO;
using System.Net;


]/// <summary>
/// 获取国家统计局网站上行政区划列表
/// </summary>
public class StatsGovCnRegional
{
/// <summary>
/// 网址
/// </summary>
public const string WebUrl = "http://www.stats.gov.cn/tjbz/xzqhdm/t20090626_402568086.htm";

/// <summary>
/// 行政区划开始区间块
/// </summary>
public const string RegionalBlockTag = @"<span class=/u0022content/u0022>(<P>[/w/W]*</P>)</span>";
//public const string RegionalBlockTag = @"^<span class=/u0022content/u0022>[/w/W]{10000,}</span>$";
//public const string RegionalBlockTag = @"<span class=/u0022content/u0022>([/w/W]*)</span>";

/// <summary>
/// 1.获得页面全部html代码文本(从国家统计局网页)
/// </summary>
/// <param name="info">输出字符串</param>
/// <returns>获取成功与否</returns>
public static bool GetPageHtml(out string info)
{
try
{
WebClient client = new WebClient();
client.Headers.Add("user-agent",
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705;)");

Stream data = client.OpenRead(WebUrl);
//StreamReader reader = new StreamReader(data);
StreamReader reader = new StreamReader(data, Encoding.Default);

//read
info = reader.ReadToEnd();

data.Close();
reader.Close();

return true;
}
catch (Exception exception)
{
info = "发生错误:/n" + exception;
return false;
}
}

/// <summary>
/// 2.获得html代码文本中Regional区域的部分
/// <example>
/// <P>代码 名称</P>
/// ...
/// <P>说明:(*)表示2008年根据国标修改的县及县以上行政区划代码和名称。</P>
/// </example>
/// </summary>
public static string GetRegionalHtml(string pageHtml)
{
Match match = Regex.Match(pageHtml, RegionalBlockTag, RegexOptions.IgnoreCase);
//System.Diagnostics.Trace.WriteLine(match.Success);
//return match.Groups[0].Value;
return match.Groups[1].Value;
}

/// <summary>
/// 3.按特定html标记分割成字符串数组,每一个字符串为一个区划(一个省,或一个市,或一个县)
/// </summary>
public List<string> GetRegionalLines(string regionalHtml)
{
regionalHtml = regionalHtml.Replace("/r/n", "");  //1.去除回车换行
regionalHtml = regionalHtml.Replace(" ", "");//2.去除空格html
List<string> strList = new List<string>();
string[] ss = regionalHtml.Split(new string[] {"<P>", "</P>", "<p>", "</p>", "<BR>", "<br>"},
StringSplitOptions.RemoveEmptyEntries);
foreach (string s1 in ss)
{
string line = "";
line = s1.Trim(); //3.去除空格
if (!string.IsNullOrEmpty(line))
strList.Add(line);
}
return strList;
}

/// <summary>
/// 4.解析判断一个行政区划字符串,并转换成model
/// </summary>
public List<Regional> GetRegional(List<string> regionalLines)
{
List<Regional> regionalList = new List<Regional>();
foreach (string s in regionalLines)
{
string sTemp = s.Trim();
if(sTemp.Length<6) continue;
string sCode = sTemp.Substring(0, 6);
if (Regex.Match(sCode, @"^[0-9]{6}$").Success)
{
Regional regional = new Regional();
regional.MyCode = sCode;
regional.MyName = sTemp.Substring(6).Trim().Trim(' ','(','*',')').Replace(" ","").Replace(" ","");

string parentCode = "";
string province = sCode.Substring(0, 2);//省级
string city = sCode.Substring(2,2);//市级
string county = sCode.Substring(4, 2);//县级
if (county.Equals("00") && city.Equals("00") && province.Equals("00"))
{
//
}
else if (county.Equals("00") && city.Equals("00"))
{
//省级
parentCode = "";
}
else if (county.Equals("00"))
{
//市级
parentCode = province + "0000";
}
else
{
//县级
parentCode = province + city + "00";
}
regional.ParentCode = parentCode;

regionalList.Add(regional);
}
}
return regionalList;
}

/// <summary>
/// 5.查找并关联行政区划父子关系
/// </summary>
public void SetRelationRegional(List<Regional> regionalList)
{
foreach (Regional regional in regionalList)
{
//父
if (string.IsNullOrEmpty(regional.ParentCode))
regional.ParentRegional = null;
else
{
Regional parentRegional =
regionalList.Find(delegate(Regional regionalParent)
{
return regionalParent.MyCode == regional.ParentCode;
}
);
regional.ParentRegional = parentRegional;
}

//子
List<Regional> sonRegionalList =
regionalList.FindAll(delegate(Regional regionalSon)
{
return regionalSon.ParentCode == regional.MyCode;
}
);
regional.SonRegionals = sonRegionalList;
}
}
private void TestSetRelationRegional()
{
//string pageHtml = GetPageHtml();
string pageHtml = "";
if (GetPageHtml(out pageHtml))
{
string regionalHtml = GetRegionalHtml(pageHtml);
List<string> ssRegionalLines = GetRegionalLines(regionalHtml);
//Console.WriteLine(ssRegionalLines.Count);//3530

List<Regional> list = GetRegional(ssRegionalLines);
SetRelationRegional(list);
foreach (Regional regional in list)
{
Console.WriteLine(
string.Format("{0}/t{1}/t{2}", regional.MyCode, regional.MyName, regional.ParentCode)
);
Console.Write(string.Format("父:{0}/n",
regional.ParentRegional == null ? "根" : regional.ParentRegional.MyName));
Console.Write("子:");
foreach (Regional sonRegional in regional.SonRegionals)
Console.Write(sonRegional.MyName + ",");
Console.WriteLine("/n------------------------------------------------------------------------------------------");
}
}
}
}


]    /// <summary>
/// 区域模型类
/// </summary>
public class Regional
{
public string MyCode { get; set; }
public string MyName { get; set; }
public string ParentCode { get; set; }

/// <summary>
/// 所有子区域
/// </summary>
public List<Regional> SonRegionals
{
get
{
if (_sonregionals == null)
_sonregionals = new List<Regional>();
return _sonregionals;
}
set { _sonregionals = value; }
}
private List<Regional> _sonregionals = null;

/// <summary>
/// 父区域
/// </summary>
public Regional ParentRegional { get; set; }
}


附件下载:http://download.csdn.net/source/1793658
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: