您的位置:首页 > 编程语言 > C#

c#语言输入关键字,抓取你想要的所有网址

2015-07-04 13:48 651 查看
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using mshtml;
using System.Collections;
using System.Threading;

namespace 遍历百度网页
{
public delegate void baidu111();
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();

}
private void Form1_Load(object sender, EventArgs e)
{
listView1.GridLines = true;
}

Thread thread;
void baidu()
{
baidu111();
}

void baidu111()
{
if (webBrowser1.InvokeRequired)
{
baidu111 dr = new baidu111(baidu111);
this.Invoke(dr);
}
else
{
string url = "http://www.baidu.com/s?wd=" + textBox1.Text;
webBrowser1.Navigate(url.Trim());
}
}
//点击停止
private void button2_Click(object sender, EventArgs e)
{

}

private void button1_Click(object sender, EventArgs e)
{
// System.Diagnostics.Process.Start("http://www.baidu.com");
//webBrowser1.Navigate("http://www.baidu.com/");
thread = new Thread(new ThreadStart(baidu));
thread.IsBackground = true;
thread.Start();

baidu();
}
int i = 0;
public void bianli()
{
IHTMLDocument2 doc = webBrowser1.Document.DomDocument as IHTMLDocument2;
foreach (IHTMLElement ele in doc.all)
{
if (ele.innerText == "下一页>")
{
ele.click();
i++;
if (i==3)
{
break;
}

}
}
}

/// <summary>
/// 判读网页是否加载完成
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
private void webBrowser1_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
{
if (i==3)
{
return;
}
if (e.Url == webBrowser1.Document.Url)
{
bianliwangye();

}
}
/// <summary>
/// 枚举获取百度搜索页面的所有网址
/// </summary>
///

ArrayList all=new ArrayList();
string a1;

public void bianliwangye()
{
IHTMLDocument2 document = (IHTMLDocument2)webBrowser1.Document.DomDocument;//获取源代码
IHTMLElementCollection hc = (IHTMLElementCollection)document.all;//获取所有标签
//MessageBox.Show(hc.ToString());

foreach (IHTMLElement h in hc)//遍历标签
{
if (h.className == "g" || h.className == "c-showurl")//以标签classname判读
{
string a = h.innerHTML;//获取标签文本内容
if (a.Contains(" "))
{
int b = a.IndexOf(" ");
a1 = a.Substring(0, b);//截取网址
MessageBox.Show(a1);
all.Add(a1);
}
}
}
bianli();//当枚举到当前页面最后一个网址,模拟点击进入下一页

}

private void button3_Click(object sender, EventArgs e)
{
listView1.Columns.Add("编号");
listView1.Columns.Add("获取到的网址",400);

for (int i = 0; i < all.Count; i++)
{
listView1.Items.Add((i+1).ToString());
listView1.Items[i].SubItems.Add(all[i].ToString());
}

}

}
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: