C#网络爬虫
2013-09-10 12:16
183 查看
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.Net;
using System.IO;
using System.Text.RegularExpressions;
namespace WindowsFormsApplication2
{
public partial class Form1 : Form
{
public string strHtml;
public Form1()
{
InitializeComponent();
}
private void Form1_Load(object sender, EventArgs e)
{
strHtml = GetPage();
}
/// <summary>
/// Download a page
/// </summary>
/// <returns>The data downloaded from the page</returns>
private string GetPage()
{
WebResponse response = null;
Stream stream = null;
StreamReader reader = null;
try
{
HttpWebRequest request = (HttpWebRequest)WebRequest.Create("http://www.hao123.cn");
response = request.GetResponse();
stream = response.GetResponseStream();
if (!response.ContentType.ToLower().StartsWith("text/html"))
{
//SaveBinaryFile(response);
return null;
}
string buffer = "", line;
reader = new StreamReader(stream, Encoding.GetEncoding("gb2312"));
while ((line = reader.ReadLine()) != null)
{
buffer += line + "\r\n";
}
//(buffer);
return buffer;
}
catch (WebException e)
{
return null;
}
catch (IOException e)
{
return null;
}
finally
{
if (reader != null)
reader.Close();
if (stream != null)
stream.Close();
if (response != null)
response.Close();
}
}
private void button1_Click(object sender, EventArgs e)
{
string strfind = strHtml;
Queue<string> link =new Queue<string>() ;
string strRef = @"(href|HREF)[ ]*=[ ]*[""'][^""'#>]+[""']";
MatchCollection matches = new Regex(strRef).Matches(strfind);
foreach (Match match in matches)
{
strRef = match.Value.Substring(match.Value.IndexOf('=') + 1).Trim('"', '\'', '#', ' ', '>');
link.Enqueue (strRef);
}
string[] arr=link.ToArray();
for (int i = 0; i < arr.Length; i++)
{
richTextBox1.AppendText(arr[i]+"\r\n");
}
MessageBox.Show("OK");
}
}
}
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.Net;
using System.IO;
using System.Text.RegularExpressions;
namespace WindowsFormsApplication2
{
public partial class Form1 : Form
{
public string strHtml;
public Form1()
{
InitializeComponent();
}
private void Form1_Load(object sender, EventArgs e)
{
strHtml = GetPage();
}
/// <summary>
/// Download a page
/// </summary>
/// <returns>The data downloaded from the page</returns>
private string GetPage()
{
WebResponse response = null;
Stream stream = null;
StreamReader reader = null;
try
{
HttpWebRequest request = (HttpWebRequest)WebRequest.Create("http://www.hao123.cn");
response = request.GetResponse();
stream = response.GetResponseStream();
if (!response.ContentType.ToLower().StartsWith("text/html"))
{
//SaveBinaryFile(response);
return null;
}
string buffer = "", line;
reader = new StreamReader(stream, Encoding.GetEncoding("gb2312"));
while ((line = reader.ReadLine()) != null)
{
buffer += line + "\r\n";
}
//(buffer);
return buffer;
}
catch (WebException e)
{
return null;
}
catch (IOException e)
{
return null;
}
finally
{
if (reader != null)
reader.Close();
if (stream != null)
stream.Close();
if (response != null)
response.Close();
}
}
private void button1_Click(object sender, EventArgs e)
{
string strfind = strHtml;
Queue<string> link =new Queue<string>() ;
string strRef = @"(href|HREF)[ ]*=[ ]*[""'][^""'#>]+[""']";
MatchCollection matches = new Regex(strRef).Matches(strfind);
foreach (Match match in matches)
{
strRef = match.Value.Substring(match.Value.IndexOf('=') + 1).Trim('"', '\'', '#', ' ', '>');
link.Enqueue (strRef);
}
string[] arr=link.ToArray();
for (int i = 0; i < arr.Length; i++)
{
richTextBox1.AppendText(arr[i]+"\r\n");
}
MessageBox.Show("OK");
}
}
}
相关文章推荐
- C#制作多线程处理强化版网络爬虫
- 适合C# Actor的消息执行方式(5):一个简单的网络爬虫
- 用C#开发蜘蛛网络爬虫采集程序(附源码)(一)
- 发布一款基于C#的网络爬虫程序
- C#实现网络爬虫
- 【C#】 知乎用户网络爬虫
- 用C#实现网络爬虫(一)
- 用C#实现网络爬虫
- C#使用多线程实现网络爬虫,并且通过网络传输,传到另外的服务器数据库存储
- 简易网络爬虫程序的开发(2)(c#版)
- C# 网络爬虫利器之Html Agility Pack如何快速实现解析Html
- 用C#实现网络爬虫(二)
- 适合C# Actor的消息执行方式(5):一个简单的网络爬虫
- C#开发技术期末大作业-网络爬虫
- 用C#开发蜘蛛网络爬虫采集程序(附源码)(二)
- 用C#实现网络爬虫
- C#网络爬虫抓取小说
- c#宽度优先的网络爬虫
- 发布一款基于C#的网络爬虫程序
- c#网络爬虫一资料