您的位置:首页 > 其它

.net2.0抓取Web网页数据分析

2006-11-22 09:16 615 查看
效果图



后台代码

以下为引用的内容:

using System;

using System.Data;

using System.Configuration;

using System.Web;

using System.Web.Security;

using System.Web.UI;

using System.Web.UI.WebControls;

using System.Web.UI.WebControls.WebParts;

using System.Web.UI.HtmlControls;

using System.Text.RegularExpressions;

using System.Net;

using System.IO;

using System.Collections;

public partial class _Default : System.Web.UI.Page

{

protected void Page_Load(object sender, EventArgs e)

{

if (!IsPostBack)

{

}

}

protected void Button1_Click(object sender, EventArgs e)

{

TextBox2.Text = "";

string web_url = this.TextBox1.Text;//"http://blog.csdn.net/21aspnet/"

string all_code = "";

HttpWebRequest all_codeRequest = (HttpWebRequest)WebRequest.Create(web_url);

WebResponse all_codeResponse = all_codeRequest.GetResponse();

StreamReader the_Reader = new StreamReader(all_codeResponse.GetResponseStream());

all_code = the_Reader.ReadToEnd();

the_Reader.Close();

ArrayList my_list = new ArrayList(); Chinaz.com

string p = @"http://([\w-]+\.)+[\w-]+(/[\w- ./?%&=]*)?";

Regex re = new Regex(p, RegexOptions.IgnoreCase);

MatchCollection mc = re.Matches(all_code);

for (int i = 0; i <= mc.Count - 1; i++)

{

bool _foo = false;

string name = mc[i].ToString();

foreach (string list in my_list)

{

if (name == list)

{

_foo = true;

break;

}

}//过滤

if (!_foo)

{

TextBox2.Text += name + "\n";

}

}

}

}

Chinaz.com

前台

以下为引用的内容:

<%@ Page Language="C#" AutoEventWireup="true" CodeFile="Default.aspx.cs" Inherits="_Default" %>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> [中国站长站]

<html xmlns="http://www.w3.org/1999/xhtml" >

<head runat="server">

<title>抓取网页所有链接</title>

</head>

<body >

<form id="form1" runat="server">

<div>

<asp:TextBox ID="TextBox1" runat="server" Width="481px"></asp:TextBox>

<asp:Button ID="Button1" runat="server" OnClick="Button1_Click" Text="提取" />

<br />

<asp:TextBox ID="TextBox2" runat="server" Height="304px" TextMode="MultiLine" Width="524px"></asp:TextBox></div>

</form> [中国站长站]

</body>

</html>

Chinaz.com
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: