您的位置:首页 > 理论基础 > 计算机网络

异步网络编程之图片批量抓取下载(C#)

2011-07-24 01:32 531 查看
支持

MSDN上异步网络编程的例子是我0.2版的核心所在

运行环境

.NET Framework2.0

开发工具

Microsoft Visual Studio 2005

正题

一. 先贴一张图,这个界面就是程序的主界面了:




二. 部分代码说明(主要讲解异步分析和下载):

异步分析下载采取的策略是同时分析同时下载,即未等待数据全部分析完毕就开始把已经分析出来的图片链接开始下载。下载成功的均在List框链接前面划上了√ ,未能下载的图片有可能是分析错误或者是下载异常。

1. 异步分析部分代码 /// <summary>

/// 异步分析下载

/// </summary>

private void AsyncAnalyzeAndDownload(string url, string savePath)

{

this.uriString = url;

this.savePath = savePath;

#region 分析计时开始

count = 0;

count1 = 0;

freq = 0;

result = 0;

QueryPerformanceFrequency(ref freq);

QueryPerformanceCounter(ref count);

#endregion

using (WebClient wClient = new WebClient())

{

AutoResetEvent waiter = new AutoResetEvent(false);

wClient.Credentials = CredentialCache.DefaultCredentials;

wClient.DownloadDataCompleted += new DownloadDataCompletedEventHandler(AsyncURIAnalyze);

wClient.DownloadDataAsync(new Uri(uriString), waiter);

//waiter.WaitOne(); //阻止当前线程,直到收到信号

}



}

/// <summary>

/// 异步分析

/// </summary>

protected void AsyncURIAnalyze(Object sender, DownloadDataCompletedEventArgs e)

{

AutoResetEvent waiter = (AutoResetEvent)e.UserState;

try

{

if (!e.Cancelled && e.Error == null)

{



string dnDir = string.Empty;

string domainName = string.Empty;

string uri = uriString;

//获得域名 http://www.sina.com/

Match match = Regex.Match(uri, @"((http(s)?://)?)+[\w-.]+[^/]");//, RegexOptions.IgnoreCase

domainName = match.Value;

//获得域名最深层目录 http://www.sina.com/mail/

if (domainName.Equals(uri))

dnDir = domainName;

else

dnDir = uri.Substring(0, uri.LastIndexOf('/'));

dnDir += '/';



//获取数据

string pageData = Encoding.UTF8.GetString(e.Result);

List<string> urlList = new List<string>();

//匹配全路径

match = Regex.Match(pageData, @"((http(s)?://)?)+(((/?)+[\w-.]+(/))*)+[\w-./]+\.+(" + ImageType + ")"); //, RegexOptions.IgnoreCase

while (match.Success)

{

string item = match.Value;

//短路径处理

if (item.IndexOf("http://") == -1 && item.IndexOf("https://") == -1)

item = (item[0] == '/' ? domainName : dnDir) + item;

if (!urlList.Contains(item))

{

urlList.Add(item);

imgUrlList.Add(item);

//实时显示分析结果

AddlbShowItem(item);

//边分析边下载

WebRequest hwr = WebRequest.Create(item);

hwr.BeginGetResponse(new AsyncCallback(AsyncDownLoad), hwr);

//hwr.Timeout = "0x30D40"; //默认 0x186a0 -> 100000 0x30D40 -> 200000

//hwr.Method = "POST";

//hwr.C;

//hwr.MaximumAutomaticRedirections = 3;

//hwr.Accept ="image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*";

//hwr.Accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";

//IAsyncResult iar = hwr.BeginGetResponse(new AsyncCallback(AsyncDownLoad), hwr);

//iar.AsyncWaitHandle.WaitOne();

}

match = match.NextMatch();

}

}

}

finally

{

waiter.Set();

#region 分析计时结束

QueryPerformanceCounter(ref count1);

count = count1 - count;

result = (double)(count) / (double)freq;

toolStripStatusLabel1.Text = "分析完毕!";

toolStripStatusLabel2.Text = string.Format(" | 分析耗时:{0}秒", result);

Application.DoEvents();

#endregion

//分析完毕

isAnalyzeComplete = true;

}

}

复制代码这两个方法主要是用WebClient来请求然后异步获得网址所返回的数据并对数据分析,提取图片链接,提取主要有两种方式:一种是完整路径的图片链接;一种是短路径的链接,比如/images/bg.gif,程序会自动为其加上域名部分组成完整的链接。

2. 异步下载部分代码 /// <summary>

/// 异步接受数据

/// </summary>

/// <param name="asyncResult"></param>

public void AsyncDownLoad(IAsyncResult asyncResult)

{

#region 下载计时开始

if (cfreq == 0)

{

QueryPerformanceFrequency(ref cfreq);

QueryPerformanceCounter(ref ccount);

}

#endregion

WebRequest request = (WebRequest)asyncResult.AsyncState;

string url = request.RequestUri.ToString();

try

{

WebResponse response = request.EndGetResponse(asyncResult);

using (Stream stream = response.GetResponseStream())

{

Image img = Image.FromStream(stream);

string[] tmpUrl = url.Split('.');

img.Save(string.Concat(savePath, "/", DateTime.Now.ToString("yyyyMMddHHmmssfff"), ".", tmpUrl[tmpUrl.Length - 1]));

img.Dispose();

stream.Close();

}

allDone.Set();

//从未下载的列表中删除已经下载的图片

imgUrlList.Remove(url);

//更新列表框

int indexItem = this.lbShow.Items.IndexOf(url);

if (indexItem >= 0 && indexItem <= this.lbShow.Items.Count)

SetlbShowItem(indexItem);

}

catch (Exception)

{

imgUrlList.Remove(url);

}

}

复制代码这部分就是异步下载图片并保存的代码,调用部分请看AsyncURIAnalyze方法分析图片链接匹配成功后就开始进行图片下载,每下载完一张图片就更新显示在界面正下方List框内(在链接前标记√ )。

篇幅有限,还有一起其他重要的代码如 实时显示分析和下载结果 的代码请下载源代码查看。另外需要注意的是输入需要下载图片的网址的时候需要输入完整的链接,带http如http://www.sina.com

程序和代码

执行文件和源码下载:IBD_exe.rar
ImagesBatchDownloading2008-8-21.rar
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: