您的位置:首页 > 编程语言 > C#

简单的C#爬虫

2017-10-26 14:34 218 查看
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;

namespace _2015._5._23通过WebClient类发起请求并下载html
{
class Program
{
static void Main(string[] args)
{
#region 抓取网页邮箱
//string url = "http://zhidao.baidu.com/link?url=cvF0de2o9gkmk3zW2jY23TLEUs6wX-79E1DQVZG7qaBhEVT_xlh6TO7p0W4qwuAZ_InLymC_-mJBBcpdbzTeq_";
//WebClient wc = new WebClient();
//wc.Encoding = Encoding.UTF8;
//string str = wc.DownloadString(url);
//MatchCollection matchs=  Regex.Matches(str,@"\w+@([-\w])+([\.\w])+",RegexOptions.ECMAScript);
//foreach (Match item in matchs)
//{
//    Console.WriteLine(item.Value);
//}
//Console.WriteLine(matchs.Count);
#endregion

#region 抓取网页图片

//WebClient wc = new WebClient();
//wc.Encoding = Encoding.UTF8;
////下载源网页代码
//string html = wc.DownloadString("http://dongxi.douban.com/?dcs=top-nav&dcm=douban");
//MatchCollection matches= Regex.Matches(html,"<img.*src=\"(.+?)\".*>");
//foreach (Match item in matches)
//{
//    //下载图片到指定路径
//    wc.DownloadFile(item.Groups[1].Value,@"c:\mv\"+Path.GetFileName(item.Groups[1].Value));
//}
//Console.WriteLine(matches.Count);

#endregion 爬一些信息

WebClient wc = new WebClient();
wc.Encoding = Encoding.UTF8;
string html = wc.DownloadString("http://www.lagou.com/");

MatchCollection matches= Regex.Matches(html,"<a.*jobs.*>(.*)</a>");
foreach (Match item in matches)
{
Console.WriteLine(item.Groups[1].Value);
}
Console.WriteLine(matches.Count);
Console.ReadKey();
}
}
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: