网络蜘蛛核心代码c#版
2013-11-19 23:27
267 查看
using System; using System.Collections.Generic; using System.Text; using System.IO; using System.IO.Compression; using System.Xml; using System.Web; using System.Collections; using System.Runtime.InteropServices; using System.Net; using System.Net.Security; using System.Security.Authentication; using System.Security.Cryptography.X509Certificates; namespace Common { /// <summary> /// 准备POST /// </summary> /// <param name="httpRequest"></param> public delegate void OnGetPostReady(HttpWebRequest httpRequest); /// <summary> /// 准备取回应 /// </summary> /// <param name="httpRequest"></param> public delegate void OnGetResponseReady(HttpWebRequest httpRequest); public class HttpWebHelper { protected HttpWebRequest httpRequest; protected HttpWebResponse httpResponse; protected CookieContainer cookieContainer; protected CredentialCache credentialCache; protected bool certificatedMode = false; protected string certFilepath = string.Empty; public OnGetPostReady OnGetPostReadyHandler = null; public OnGetPostReady OnGetResponseReadyHandler = null; protected readonly int DEFAULT_BUFFER_SIZE = 4096; public WebProxy webProxySrv = null; private static readonly int MyConnectionLimit = 300; public bool CheckGotoRecv { get; set; } public bool DoBetIsGotoRecv { get; set; } public bool LastAccessError { private set; get; } /// <summary> /// 当前自动转向后的url /// </summary> public string CurrentUrl { private set; get; } public string CurrentLocation { private set; get; } public string CurSetCookie { set; get; } public string CurSetCookie2 { set; get; } /// <summary> /// 默认构造器 /// </summary> public HttpWebHelper() { this.cookieContainer = new CookieContainer(); ServicePointManager.DefaultConnectionLimit = MyConnectionLimit; ServicePointManager.Expect100Continue = false; ServicePointManager.MaxServicePointIdleTime = 10000; } /// <summary> /// 代理參數構造器 /// </summary> /// <param name="wp"></param> public HttpWebHelper(WebProxy wp) : this() { this.webProxySrv = wp; } /// <summary> /// 需要基本认证的构造器 /// </summary> /// <param name="cred"></param> public HttpWebHelper(bool cred) : this() { this.certificatedMode = cred; } public HttpWebHelper(bool cred, WebProxy wp) : this() { this.certificatedMode = cred; this.webProxySrv = wp; } /// <summary> /// 基本认证和证书,refer页面 /// </summary> /// <param name="cred"></param> /// <param name="certFilepath"></param> public HttpWebHelper(bool cred, string certFilepath) : this(cred) { this.certFilepath = certFilepath; } public HttpWebHelper(bool cred, WebProxy wp, string certFilepath) : this(cred, wp) { this.certFilepath = certFilepath; } /// <summary> /// 提供批量用户名和密码的构造器 /// </summary> /// <param name="uri"></param> /// <param name="method"></param> /// <param name="username"></param> /// <param name="password"></param> public HttpWebHelper(string uri, string method, string username, string password) : this(true) { this.credentialCache = new CredentialCache(); this.credentialCache.Add(new Uri(uri), method, new NetworkCredential(username, password)); } /// <summary> /// 安全询问回调函数,直接同意 /// </summary> /// <param name="sender"></param> /// <param name="certificate"></param> /// <param name="chain"></param> /// <param name="errors"></param> /// <returns></returns> public bool CheckValidationResult(object sender, X509Certificate certificate, X509Chain chain, SslPolicyErrors errors) { return true; } private void SetHttpRequestOptions_Accept(string url, string method, CookieCollection cc, string referUrl, bool nocache, DecompressionMethods dm, string httpAccept) { this.SetHttpRequestOptions(url, method, cc, referUrl, nocache, dm); this.httpRequest.Accept = httpAccept; } /// <summary> /// 设置HttpWebRequest对象 /// </summary> /// <param name="url"></param> /// <param name="method"></param> /// <param name="cc"></param> /// <param name="referUrl"></param> /// <param name="nocache"></param> /// <param name="dm"></param> private void SetHttpRequestOptions(string url, string method, CookieCollection cc, string referUrl, bool nocache, DecompressionMethods dm) { httpRequest = (HttpWebRequest)HttpWebRequest.Create(url); httpRequest.UnsafeAuthenticatedConnectionSharing = true; httpRequest.ServicePoint.ConnectionLimit = MyConnectionLimit; if (null != this.webProxySrv) httpRequest.Proxy = this.webProxySrv; if (this.certificatedMode && url.ToLower().Substring(0, 5).Equals("https")) { ServicePointManager.ServerCertificateValidationCallback = new System.Net.Security.RemoteCertificateValidationCallback(CheckValidationResult); if (null == this.credentialCache) httpRequest.UseDefaultCredentials = true; else httpRequest.Credentials = this.credentialCache; if (!string.IsNullOrEmpty(this.certFilepath)) httpRequest.ClientCertificates.Add(X509Certificate.CreateFromCertFile(this.certFilepath)); } httpRequest.CookieContainer = this.cookieContainer; if (!string.IsNullOrEmpty(referUrl)) httpRequest.Referer = referUrl; httpRequest.AutomaticDecompression = dm; httpRequest.ServicePoint.Expect100Continue = false; httpRequest.ServicePoint.UseNagleAlgorithm = false; httpRequest.ContentType = "application/x-www-form-urlencoded"; // httpRequest.Accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, */*"; // httpRequest.AllowWriteStreamBuffering = true; 默认值就是true // httpRequest.AllowAutoRedirect = true; 默认值就是true httpRequest.Method = method; httpRequest.Timeout = ApplicationConfig.HTTP_REQUEST_TIMEOUT; // 讀寫超時 //httpRequest.ReadWriteTimeout = ApplicationConfig.HTTP_REQUEST_TIMEOUT; // httpRequest.MaximumAutomaticRedirections = 50; 默认值就是50 httpRequest.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727)"; httpRequest.Headers.Add("Accept-Language", "zh-cn"); httpRequest.Headers.Add("UA-CPU", "x86"); //httpRequest.Headers.Add("Accept-Encoding", "gzip, deflate"); if (nocache) { httpRequest.Headers.Add("Cache-Control", "no-cache"); //httpRequest.Headers.Add("Pragma", "no-cache"); } if (null != cc) httpRequest.CookieContainer.Add(cc); // 回调发起请求前事件 if(null != this.OnGetPostReadyHandler) { try { this.OnGetPostReadyHandler(this.httpRequest); //BaseDebug.DebugPrint("KeepAlive = " + this.httpRequest.KeepAlive.ToString()); } catch (System.Exception ex) { this.LastAccessError = true; BaseDebug.DebugPrint(ex.ToString()); } } } private void SetHttpRequestOptions(string url, string method, CookieCollection cc, string referUrl, string httpAccept) { this.SetHttpRequestOptions_Accept(url, method, cc, referUrl, false, DecompressionMethods.GZip | DecompressionMethods.Deflate, httpAccept); } /// <summary> /// 重新设置某些成员 /// </summary> private void ManualResetMember() { this.cookieContainer = httpRequest.CookieContainer; this.CurrentUrl = httpRequest.Address.OriginalString; this.CurrentLocation = httpResponse.Headers["Location"]; } public MemoryStream GetMemoryStream(string url, string method, CookieCollection cc, string referUrl, string httpAccept) { MemoryStream ms = new MemoryStream(); try { this.SetHttpRequestOptions(url, method, cc, referUrl, "*/*"); this.httpRequest.Accept = httpAccept; this.httpResponse = (HttpWebResponse)httpRequest.GetResponse(); // 是否收到响应 if (!this.httpRequest.HaveResponse) { this.httpResponse.Close(); this.httpRequest.Abort(); return ms; } this.ManualResetMember(); if (null != this.OnGetResponseReadyHandler) { try { this.OnGetResponseReadyHandler(this.httpRequest); } catch (System.Exception ex) { this.LastAccessError = true; BaseDebug.DebugPrint(ex.ToString()); } } this.DoBetIsGotoRecv = true; Stream sm = httpResponse.GetResponseStream(); if (null != sm && sm.CanRead) { BinaryReader br = new BinaryReader(sm); byte[] bytes = br.ReadBytes(DEFAULT_BUFFER_SIZE); while (null != bytes && bytes.Length != 0) { ms.Write(bytes, 0, bytes.Length); bytes = br.ReadBytes(DEFAULT_BUFFER_SIZE); } br.Close(); } if (httpResponse.Headers["Set-Cookie"] != null) this.CurSetCookie = httpResponse.Headers["Set-Cookie"].ToString(); httpResponse.Close(); if (null != sm) sm.Close(); // 非常重要,回到开头 ms.Seek(0, SeekOrigin.Begin); } catch (System.Exception ex) { this.LastAccessError = true; BaseDebug.DebugPrint("異常網址:" + url); BaseDebug.DebugPrint(ex.ToString()); if (null != httpRequest) httpRequest.Abort(); } return ms; } public MemoryStream SimpleGetMemoryStream(string url, string method) { return this.GetMemoryStream(url, method, null, null, "text/html"); } public MemoryStream SimpleGetMemoryStream(string url, string method, string httpAccept) { return this.GetMemoryStream(url, method, null, null, httpAccept); } /// <summary> /// 仅仅发送请求,返回所有的输出文本 /// </summary> /// <param name="url"></param> /// <param name="method"></param> /// <param name="coding"></param> /// <param name="cc"></param> /// <param name="referUrl"></param> /// <returns></returns> public string SimpleDoPostWrapper(string url, string method, Encoding coding, CookieCollection cc, string referUrl) { string str = string.Empty; StreamReader sr = null; MemoryStream sm = null; if (null == coding) { sm = this.GetMemoryStream(url, method, cc, referUrl, "text/html"); sr = new StreamReader(sm); } else { sm = this.GetMemoryStream(url, method, cc, referUrl, "text/html"); sr = new StreamReader(sm, coding); } str = sr.ReadToEnd(); sr.Close(); sm.Close(); return str; } public string SimpleDoPostWrapper(string url, string method) { return this.SimpleDoPostWrapper(url, method, null, null, null); } public string SimpleDoPostWrapper(string url, string method, CookieCollection cc) { return this.SimpleDoPostWrapper(url, method, null, cc, null); } public string SimpleDoPostWrapper(string url, string method, string referUrl) { return this.SimpleDoPostWrapper(url, method, null, null, referUrl); } /// <summary> /// 上送数据,返回输出流 /// </summary> /// <param name="url"></param> /// <param name="data"></param> /// <param name="method"></param> /// <param name="coding"></param> /// <param name="cc"></param> /// <param name="referUrl"></param> /// <returns></returns> public MemoryStream GetMemoryStream(string url, string data, string method, Encoding coding, CookieCollection cc, string referUrl) { MemoryStream ms = new MemoryStream(); try { this.SetHttpRequestOptions(url, method, cc, referUrl, "text/html"); byte[] bytesData = coding.GetBytes(data); Stream requestStream = httpRequest.GetRequestStream(); requestStream.Write(bytesData, 0, bytesData.Length); requestStream.Flush(); requestStream.Close(); this.httpResponse = (HttpWebResponse)httpRequest.GetResponse(); // 是否收到响应 if (!this.httpRequest.HaveResponse) { this.httpResponse.Close(); this.httpRequest.Abort(); return ms; } this.ManualResetMember(); if (null != this.OnGetResponseReadyHandler) { try { this.OnGetResponseReadyHandler(this.httpRequest); } catch (System.Exception ex) { this.LastAccessError = true; BaseDebug.DebugPrint(ex.ToString()); } } this.DoBetIsGotoRecv = true; Stream sm = httpResponse.GetResponseStream(); if (null != sm && sm.CanRead) { BinaryReader br = new BinaryReader(sm); byte[] bytes = br.ReadBytes(DEFAULT_BUFFER_SIZE); while (null != bytes && bytes.Length != 0) { ms.Write(bytes, 0, bytes.Length); bytes = br.ReadBytes(DEFAULT_BUFFER_SIZE); } br.Close(); } if (httpResponse.Headers["Set-Cookie"] != null) this.CurSetCookie = httpResponse.Headers["Set-Cookie"].ToString(); httpResponse.Close(); if (null != sm) sm.Close(); // 非常重要,回到开头 ms.Seek(0, SeekOrigin.Begin); } catch (System.Exception ex) { this.LastAccessError = true; BaseDebug.DebugPrint("異常網址:" + url); BaseDebug.DebugPrint(ex.ToString()); if (null != httpRequest) httpRequest.Abort(); } return ms; } public MemoryStream SimpleGetMemoryStream(string url, string data, string method, Encoding coding) { return this.GetMemoryStream(url, data, method, coding, null, null); } public MemoryStream SimpleGetMemoryStream(string url, string data, string method, Encoding coding, string referUrl) { return this.GetMemoryStream(url, data, method, coding, null, referUrl); } /// <summary> /// 上送,返回所有的输出文本 /// </summary> /// <param name="url"></param> /// <param name="data"></param> /// <param name="method"></param> /// <param name="coding"></param> /// <param name="referUrl"></param> /// <returns></returns> public string DoPostWrapper(string url, string data, string method, Encoding coding, CookieCollection cc, string referUrl) { string str = string.Empty; MemoryStream sm = this.GetMemoryStream(url, data, method, coding, cc, referUrl); StreamReader sr = new StreamReader(sm); str = sr.ReadToEnd(); sr.Close(); sm.Close(); return str; } public string DoPostWrapper(string url, string data, string method, Encoding coding) { return this.DoPostWrapper(url, data, method, coding, null, null); } public string DoPostWrapper(string url, string data, string method, Encoding coding, CookieCollection cc) { return this.DoPostWrapper(url, data, method, coding, cc, null); } public string DoPostWrapper(string url, string data, string method, Encoding coding, string referUrl) { return this.DoPostWrapper(url, data, method, coding, null, referUrl); } /// <summary> /// 上送,返回所有的输出文本,参数是字典 /// </summary> /// <param name="url"></param> /// <param name="dicArguments"></param> /// <param name="method"></param> /// <param name="coding"></param> /// <param name="referUrl"></param> /// <returns></returns> public string DoPostWrapper(string url, Dictionary<string, string> dicArguments, string method, Encoding coding, CookieCollection cc, string referUrl) { string data = this.BuildRequestArguments(dicArguments); return this.DoPostWrapper(url, data, method, coding, cc, referUrl); } public string DoPostWrapper(string url, Dictionary<string, string> dicArguments, string method, Encoding coding) { return this.DoPostWrapper(url, dicArguments, method, coding, null, null); } public string DoPostWrapper(string url, Dictionary<string, string> dicArguments, string method, Encoding coding, CookieCollection cc) { return this.DoPostWrapper(url, dicArguments, method, coding, cc, null); } public string DoPostWrapper(string url, Dictionary<string, string> dicArguments, string method, Encoding coding, string referUrl) { return this.DoPostWrapper(url, dicArguments, method, coding, null, referUrl); } /// <summary> /// 下载验证码,只返回内存流,调用函数要负责关闭该Stream /// </summary> /// <param name="url"></param> /// <param name="method"></param> /// <returns></returns> public MemoryStream DownloadStream(string url, string method) { return this.SimpleGetMemoryStream(url, method, "*/*"); } /// <summary> /// 从字典中生成上传参数.提供编码定制支持 /// </summary> /// <param name="dicArguments"></param> /// <param name="coding"></param> /// <returns></returns> public string BuildRequestArguments(Dictionary<string, string> dicArguments, Encoding coding) { StringBuilder sb = new StringBuilder(); string str = string.Empty; if (0 == dicArguments.Count) return str; foreach (KeyValuePair<string, string> kvp in dicArguments) { if(null != coding) sb.Append(HttpUtility.UrlEncode(kvp.Key, coding) + "=" + HttpUtility.UrlEncode(kvp.Value, coding)); else sb.Append(HttpUtility.UrlEncode(kvp.Key) + "=" + HttpUtility.UrlEncode(kvp.Value)); // a&b sb.Append("&"); } str = sb.ToString(); return str.Substring(0, str.Length - 1); } /// <summary> /// 从字典中生成上传的默认参数,不提供编码定制支持 /// </summary> /// <param name="dicArguments"></param> /// <returns></returns> public string BuildRequestArguments(Dictionary<string, string> dicArguments) { return this.BuildRequestArguments(dicArguments, null); } /// <summary> /// 查询cookie中的某个项的值 /// </summary> /// <param name="key"></param> /// <param name="domain"></param> /// <returns></returns> public string GetCookieValue(string key, string domain) { if (0 == this.cookieContainer.Count) { return string.Empty; } CookieCollection cc = this.cookieContainer.GetCookies(new Uri(domain)); return cc[key].Value; } /// <summary> /// 设置cookies容器 /// </summary> /// <param name="cc"></param> public void SetCookieContainer(CookieContainer cc) { this.cookieContainer = cc; } /// <summary> /// 放棄請求 /// </summary> public bool AbortHttpRequest() { if(null != this.httpRequest) { this.httpRequest.Abort(); } return this.CheckGotoRecv && this.DoBetIsGotoRecv; } } }
相关文章推荐
- C#和Java 网络编程实例核心代码
- 基于visual c++之windows核心编程代码分析(47)实现交换网络的QQ号嗅探
- 基于visual c++之windows核心编程代码分析(47)实现交换网络的QQ号嗅探
- Android_网络请求数据之HttpClient方式(核心代码)
- 基于visual c++之windows核心编程代码分析(47)实现交换网络的QQ号嗅探
- android平台基于sip协议的网络电话实现(知识点及核心代码)
- Libgdx: android单机斗地主支持局域网wifi联网的网络模块核心代码
- 神经网络入门 Python 十行核心代码
- HTTP的网络访问核心代码
- 搜索引擎/网络蜘蛛程序代码
- 本人第一个开源代码,NETSpider 网络蜘蛛采集工具
- Libgdx: android单机斗地主支持局域网wifi联网的网络模块核心代码
- 网络蜘蛛(网络爬虫)核心C#源代码
- 传智播客c/c++公开课学习笔记--Linux网络流媒体服务器的核心代码揭秘
- MFC socket网络通讯核心代码
- 传智播客c/c++公开课学习笔记--Linux网络流媒体服务器的核心代码揭秘
- http上传文件到网络核心代码
- unix/linux的网络核心代码关系
- Android网络传输框架的核心代码量对比
- Android_网络请求数据之HttpConnection(核心代码)