您的位置:首页 > 理论基础 > 计算机网络

C#网络爬虫

2013-09-10 12:16 183 查看
using System;

using System.Collections.Generic;

using System.ComponentModel;

using System.Data;

using System.Drawing;

using System.Linq;

using System.Text;

using System.Windows.Forms;

using System.Net;

using System.IO;

using System.Text.RegularExpressions;

namespace WindowsFormsApplication2

{

public partial class Form1 : Form

{

public string strHtml;

public Form1()

{

InitializeComponent();

}

private void Form1_Load(object sender, EventArgs e)

{

strHtml = GetPage();

}

/// <summary>

/// Download a page

/// </summary>

/// <returns>The data downloaded from the page</returns>

private string GetPage()

{

WebResponse response = null;

Stream stream = null;

StreamReader reader = null;

try

{

HttpWebRequest request = (HttpWebRequest)WebRequest.Create("http://www.hao123.cn");

response = request.GetResponse();

stream = response.GetResponseStream();

if (!response.ContentType.ToLower().StartsWith("text/html"))

{

//SaveBinaryFile(response);

return null;

}

string buffer = "", line;

reader = new StreamReader(stream, Encoding.GetEncoding("gb2312"));

while ((line = reader.ReadLine()) != null)

{

buffer += line + "\r\n";

}

//(buffer);

return buffer;

}

catch (WebException e)

{

return null;

}

catch (IOException e)

{

return null;

}

finally

{

if (reader != null)

reader.Close();

if (stream != null)

stream.Close();

if (response != null)

response.Close();

}

}

private void button1_Click(object sender, EventArgs e)

{

string strfind = strHtml;

Queue<string> link =new Queue<string>() ;

string strRef = @"(href|HREF)[ ]*=[ ]*[""'][^""'#>]+[""']";

MatchCollection matches = new Regex(strRef).Matches(strfind);

foreach (Match match in matches)

{

strRef = match.Value.Substring(match.Value.IndexOf('=') + 1).Trim('"', '\'', '#', ' ', '>');

link.Enqueue (strRef);

}

string[] arr=link.ToArray();

for (int i = 0; i < arr.Length; i++)

{

richTextBox1.AppendText(arr[i]+"\r\n");

}

MessageBox.Show("OK");

}

}

}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: