C#的一个URL加载器,能处理编码、相对地址解析、GET/POST、HTML的include、页面重定向
2016-10-01 10:12
513 查看
要让机器模拟上网,首要的问题是解决HTTP请求响应,看我们的Url加载器,功能比较强。它考虑了编码、URL的相对地址解析(见RFC),还可以POST数据,还有HTML里的<!--include-->,还有<head>里的重定向,很好用的。
以下代码随便用吧,咱们已经拿这段代码申请过著作权拉。
1
/// <summary>
2
/// 最基本的Url加载函数,其它重载函数均调用它
3
/// </summary>
4
/// <param name="url"></param>
5
/// <param name="encoding"></param>
6
/// <param name="postdata"></param>
7
/// <param name="include">是否在客户端包含include文件</param>
8
/// <param name="redirectioncounter">计算重定向的次数</param>
9
/// <returns></returns>
10
public static string LoadUrl(ref UrlOperation uo, string encoding, string postdata, bool include, int redirectioncounter)
11
{
12
string str;
13
14
string url=uo.Url;
15
HttpWebRequest request;
16
HttpWebResponse response;
17
18
//采用HTTP GET或者POST
19
if (postdata == null)
20
postdata = "";
21
if (postdata.Length == 0)//HTTP GET
22
{
23
try
24
{
25
request = (HttpWebRequest)System.Net.HttpWebRequest.Create(url);
26
}
27
catch
28
{
29
return "";
30
}
31
32
request.UserAgent = "Mozilla/4.0 (compatible; MSIE 5.5; Windows 98)";
33
34
//超时异常发生在这里
35
try
36
{
37
response = (HttpWebResponse)request.GetResponse();
38
//uo.Url = response.ResponseUri.ToString();
39
}
40
catch
41
{
42
return "";
43
}
44
45
System.IO.Stream stream = response.GetResponseStream();
46
47
Encoding source;
48
try
49
{
50
source = Encoding.GetEncoding(encoding);
51
}
52
catch
53
{
54
source = Encoding.UTF8;
55
}
56
57
StreamReader sr = new StreamReader(stream, source);
58
try
59
{
60
str = sr.ReadToEnd();
61
}
62
catch
63
{
64
return "";
65
}
66
sr.Close();
67
stream.Close();
68
}
69
else//HTTP POST
70
{
71
try
72
{
73
ASCIIEncoding asciiencoding = new ASCIIEncoding();
74
byte[] bytes = asciiencoding.GetBytes(postdata);
75
76
request = (HttpWebRequest)System.Net.HttpWebRequest.Create(url);
77
request.Method = "POST";
78
request.ContentType = "application/x-www-form-urlencoded";
79
request.ContentLength = postdata.Length;
80
81
Stream poststream = request.GetRequestStream();
82
poststream.Write(bytes, 0, bytes.Length);
83
poststream.Close();
84
85
response = (HttpWebResponse)request.GetResponse();
86
87
StreamReader sr = new StreamReader(response.GetResponseStream(), System.Text.Encoding.GetEncoding("GB2312"));
88
str = sr.ReadToEnd();
89
response.Close();
90
}
91
catch
92
{
93
return "";
94
}
95
}
96
97
uo.Url = response.ResponseUri.ToString();
98
99
//在客户端包含include文件
100
if (include)
101
{
102
System.Text.RegularExpressions.Regex regex = new Regex(@"<!--/W*include.*?-->", RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline);
103
MatchCollection mc = regex.Matches(str);
104
if (mc.Count > 0)
105
{
106
System.Text.RegularExpressions.Regex urlregex = new Regex("(?<=/").*(?=/")", RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline);
107
108
string[] segments = regex.Split(str);
109
110
StringBuilder sb = new StringBuilder();
111
sb.Append(segments[0]);
112
for (int i = 1; i <= mc.Count; i++)
113
{
114
string s = mc[i - 1].Value;
115
string newurl = urlregex.Match(s).Value;
116
UrlOperation newuo = uo.Forward(newurl);
117
string included = LoadUrl(ref newuo, encoding, "", true);
118
sb.Append(included);
119
sb.Append(segments[i]);
120
}
121
122
str = sb.ToString();
123
}
124
}
125
126
//页面重定向
127
string redirection=GetRedirection(str).Trim();
128
if (redirection.Length > 0&&redirectioncounter<5)
129
{
130
uo=uo.Forward(redirection);
131
return LoadUrl(ref uo, encoding, postdata, include, redirectioncounter + 1);
132
}
133
else
134
return str;
135
}
136
以下代码随便用吧,咱们已经拿这段代码申请过著作权拉。
1
/// <summary>
2
/// 最基本的Url加载函数,其它重载函数均调用它
3
/// </summary>
4
/// <param name="url"></param>
5
/// <param name="encoding"></param>
6
/// <param name="postdata"></param>
7
/// <param name="include">是否在客户端包含include文件</param>
8
/// <param name="redirectioncounter">计算重定向的次数</param>
9
/// <returns></returns>
10
public static string LoadUrl(ref UrlOperation uo, string encoding, string postdata, bool include, int redirectioncounter)
11
{
12
string str;
13
14
string url=uo.Url;
15
HttpWebRequest request;
16
HttpWebResponse response;
17
18
//采用HTTP GET或者POST
19
if (postdata == null)
20
postdata = "";
21
if (postdata.Length == 0)//HTTP GET
22
{
23
try
24
{
25
request = (HttpWebRequest)System.Net.HttpWebRequest.Create(url);
26
}
27
catch
28
{
29
return "";
30
}
31
32
request.UserAgent = "Mozilla/4.0 (compatible; MSIE 5.5; Windows 98)";
33
34
//超时异常发生在这里
35
try
36
{
37
response = (HttpWebResponse)request.GetResponse();
38
//uo.Url = response.ResponseUri.ToString();
39
}
40
catch
41
{
42
return "";
43
}
44
45
System.IO.Stream stream = response.GetResponseStream();
46
47
Encoding source;
48
try
49
{
50
source = Encoding.GetEncoding(encoding);
51
}
52
catch
53
{
54
source = Encoding.UTF8;
55
}
56
57
StreamReader sr = new StreamReader(stream, source);
58
try
59
{
60
str = sr.ReadToEnd();
61
}
62
catch
63
{
64
return "";
65
}
66
sr.Close();
67
stream.Close();
68
}
69
else//HTTP POST
70
{
71
try
72
{
73
ASCIIEncoding asciiencoding = new ASCIIEncoding();
74
byte[] bytes = asciiencoding.GetBytes(postdata);
75
76
request = (HttpWebRequest)System.Net.HttpWebRequest.Create(url);
77
request.Method = "POST";
78
request.ContentType = "application/x-www-form-urlencoded";
79
request.ContentLength = postdata.Length;
80
81
Stream poststream = request.GetRequestStream();
82
poststream.Write(bytes, 0, bytes.Length);
83
poststream.Close();
84
85
response = (HttpWebResponse)request.GetResponse();
86
87
StreamReader sr = new StreamReader(response.GetResponseStream(), System.Text.Encoding.GetEncoding("GB2312"));
88
str = sr.ReadToEnd();
89
response.Close();
90
}
91
catch
92
{
93
return "";
94
}
95
}
96
97
uo.Url = response.ResponseUri.ToString();
98
99
//在客户端包含include文件
100
if (include)
101
{
102
System.Text.RegularExpressions.Regex regex = new Regex(@"<!--/W*include.*?-->", RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline);
103
MatchCollection mc = regex.Matches(str);
104
if (mc.Count > 0)
105
{
106
System.Text.RegularExpressions.Regex urlregex = new Regex("(?<=/").*(?=/")", RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline);
107
108
string[] segments = regex.Split(str);
109
110
StringBuilder sb = new StringBuilder();
111
sb.Append(segments[0]);
112
for (int i = 1; i <= mc.Count; i++)
113
{
114
string s = mc[i - 1].Value;
115
string newurl = urlregex.Match(s).Value;
116
UrlOperation newuo = uo.Forward(newurl);
117
string included = LoadUrl(ref newuo, encoding, "", true);
118
sb.Append(included);
119
sb.Append(segments[i]);
120
}
121
122
str = sb.ToString();
123
}
124
}
125
126
//页面重定向
127
string redirection=GetRedirection(str).Trim();
128
if (redirection.Length > 0&&redirectioncounter<5)
129
{
130
uo=uo.Forward(redirection);
131
return LoadUrl(ref uo, encoding, postdata, include, redirectioncounter + 1);
132
}
133
else
134
return str;
135
}
136
相关文章推荐
- Jsoup解析html时对相对地址的处理
- j2ee的web项目,在浏览器中发起一个该项目中html页面的绝对地址,也是发起的一个http url请求,请求的响应报文的结果就是该html页面的所有html代码
- 在C#.net中做页面上传的程序。用Dhtml的控件:(创建文件上载控件,该控件带有一个文本框和一个浏览按钮。)和类HtmlInputFile的两种方法
- php 访问一个网站地址,发送post数据和设置cookie,取得服务器返回的数据(html,setcookie命令设置的cookie);
- C#正则表达式提取HTML中IMG标签的URL地址 .
- 读取html页面文件解析邮箱地址
- Asp.net url重写后页面回发地址改变的处理
- 简单的解析文件,取URL地址,并根据地址抓下页面
- Url与Html的编码与解码(C#)
- C# 获取上一个页面的URL
- c#通过GET/POST获取页面的代码
- 通过hidden隐藏域和URL参数(在一个处理页面实现不同的处理内容)
- Url地址重写,利用HttpHander手工编译页面并按需生成静态HTML文件
- C#获取指定URL页面的HTML源码
- C# 获取上一个页面/当前页面的URL
- 一个URL路径编码(C#版)
- .net调用别人的http页面,返回结果,url中有中文的处理,get和post两种方法
- JavaScript 解析html页面中的URL字符串
- C#获取页面URL地址
- html页面加载一个jsp页面或者html页面 用<!--#include virtual=""--> 这样的标签