您的位置：首页 > 编程语言 > C#

C#实现简单的网页爬虫

2015-05-23 14:02 309 查看

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;

namespace _2015._5._23通过WebClient类发起请求并下载html
{
    class Program
    {
        static void Main(string[] args)
        {
            #region 抓取网页邮箱
            //string url = "http://zhidao.baidu.com/link?url=cvF0de2o9gkmk3zW2jY23TLEUs6wX-79E1DQVZG7qaBhEVT_xlh6TO7p0W4qwuAZ_InLymC_-mJBBcpdbzTeq_";
            //WebClient wc = new WebClient();
            //wc.Encoding = Encoding.UTF8;
            //string str = wc.DownloadString(url);
            //MatchCollection matchs=  Regex.Matches(str,@"\w+@([-\w])+([\.\w])+",RegexOptions.ECMAScript);
            //foreach (Match item in matchs)
            //{
            //    Console.WriteLine(item.Value);
            //}
            //Console.WriteLine(matchs.Count);
            #endregion 

            #region 抓取网页图片

            //WebClient wc = new WebClient();
            //wc.Encoding = Encoding.UTF8;
            ////下载源网页代码
            //string html = wc.DownloadString("http://dongxi.douban.com/?dcs=top-nav&dcm=douban");
            //MatchCollection matches= Regex.Matches(html,"<img.*src=\"(.+?)\".*>");
            //foreach (Match item in matches)
            //{
            //    //下载图片到指定路径
            //    wc.DownloadFile(item.Groups[1].Value,@"c:\mv\"+Path.GetFileName(item.Groups[1].Value));
            //}
            //Console.WriteLine(matches.Count);

            #endregion 爬一些信息

            WebClient wc = new WebClient();
            wc.Encoding = Encoding.UTF8;
            string html = wc.DownloadString("http://www.lagou.com/");

            MatchCollection matches= Regex.Matches(html,"<a.*jobs.*>(.*)</a>");
            foreach (Match item in matches)
            {
                Console.WriteLine(item.Groups[1].Value);
            }
            Console.WriteLine(matches.Count);
            Console.ReadKey();                                  
        }
    }
}

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航