您的位置:首页 > 编程语言 > Go语言

自己写的抓取Google赞助商链接的页面源码

2007-02-02 18:56 621 查看
private void GetInfo()
{
WebClient InfoWclient = new WebClient();

string StrGatherURL = TextBoxGatherURL.Text + HttpUtility.UrlEncode(TextBoxAdWord.Text);

string StrGatherBaseURL = StrGatherURL.Substring(0, StrGatherURL.Replace("//", "").IndexOf("/") + 2);

Stream InfoStream = InfoWclient.OpenRead(StrGatherURL);
StreamReader InfoStreamReader = new StreamReader(InfoStream, Encoding.GetEncoding("GB2312"));
string StrFullInfo = InfoStreamReader.ReadToEnd();

Regex AdListTypeRegex = new Regex("更多赞助商链接");
Regex AdListInfoRegex;

if (AdListTypeRegex.IsMatch(StrFullInfo))
{
AdListInfoRegex = new Regex("<table cellspacing=0 cellpadding=0 width=25% align=right bgcolor=#ffffff border=0>.*更多赞助商链接 »</a></font></td></tr></table>", RegexOptions.IgnoreCase | RegexOptions.Compiled);

}
else
{
AdListInfoRegex = new Regex("<table cellspacing=0 cellpadding=0 width=25% align=right bgcolor=#ffffff border=0>.*<font size=-1></font></td></tr></table>", RegexOptions.IgnoreCase | RegexOptions.Compiled);
}

string StrAdList = AdListInfoRegex.Match(StrFullInfo).ToString();

Regex AdLinkTitleRegex = new Regex("<font size=.?0.*</font></a>", RegexOptions.IgnoreCase | RegexOptions.Compiled);
Regex AdLinkSrcRegex = new Regex("/url.*target=nw", RegexOptions.IgnoreCase | RegexOptions.Compiled);

MatchCollection AdLinkSrcMatchCollection = AdLinkSrcRegex.Matches(StrAdList.Replace("target=nw>", "target=nw\n"));

MatchCollection AdLinkTitleMatchCollection = AdLinkTitleRegex.Matches(StrAdList.ToLower().Replace("<br>", "\n"));

DataTable AdListDt = new DataTable();
AdListDt.Columns.Add("Place");
AdListDt.Columns.Add("Title");
AdListDt.Columns.Add("Value");

for (int i = 0; i < AdLinkSrcMatchCollection.Count; i++)
{
string StrAdLinkSrc = AdLinkSrcMatchCollection[i].ToString();
string StrAdListItemValue = StrGatherBaseURL + StrAdLinkSrc.Replace(" target=nw", "");

DataRow AdAddRow = AdListDt.NewRow();
AdAddRow["Place"] = (i+1).ToString();
AdAddRow["Title"] = AdLinkTitleMatchCollection[i].ToString().Replace("<font size=+0>", "").Replace("</font></a>", "");
AdAddRow["Value"] = StrAdListItemValue;
AdListDt.Rows.Add(AdAddRow);
}
GridViewAdList.DataSource = AdListDt;
GridViewAdList.Columns[0].HeaderText = "位置";
GridViewAdList.Columns[0].Width = 40;
GridViewAdList.Columns[1].HeaderText = "标题";
GridViewAdList.Columns[1].Width = 200;
GridViewAdList.Columns[2].HeaderText = "链接";
GridViewAdList.Columns[2].Visible = false;

}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: