您的位置:首页 > Web前端 > HTML

C#的一个URL加载器,能处理编码、相对地址解析、GET/POST、HTML的include、页面重定向

2016-10-01 10:12 513 查看
要让机器模拟上网,首要的问题是解决HTTP请求响应,看我们的Url加载器,功能比较强。它考虑了编码、URL的相对地址解析(见RFC),还可以POST数据,还有HTML里的<!--include-->,还有<head>里的重定向,很好用的。

以下代码随便用吧,咱们已经拿这段代码申请过著作权拉。

  1

        /// <summary>
  2

        /// 最基本的Url加载函数,其它重载函数均调用它
  3

        /// </summary>
  4

        /// <param name="url"></param>
  5

        /// <param name="encoding"></param>
  6

        /// <param name="postdata"></param>
  7

        /// <param name="include">是否在客户端包含include文件</param>
  8

        /// <param name="redirectioncounter">计算重定向的次数</param>
  9

        /// <returns></returns>
 10

        public static string LoadUrl(ref UrlOperation uo, string encoding, string postdata, bool include, int redirectioncounter)
 11

        {
 12

            string str;
 13


 14

            string url=uo.Url;
 15

            HttpWebRequest request;
 16

            HttpWebResponse response;
 17


 18

            //采用HTTP GET或者POST
 19

            if (postdata == null)
 20

                postdata = "";
 21

            if (postdata.Length == 0)//HTTP GET
 22

            {
 23

                try
 24

                {
 25

                    request = (HttpWebRequest)System.Net.HttpWebRequest.Create(url);
 26

                }
 27

                catch
 28

                {
 29

                    return "";
 30

                }
 31


 32

                request.UserAgent = "Mozilla/4.0 (compatible; MSIE 5.5; Windows 98)";
 33


 34

                //超时异常发生在这里
 35

                try
 36

                {
 37

                    response = (HttpWebResponse)request.GetResponse();
 38

                    //uo.Url = response.ResponseUri.ToString();
 39

                }
 40

                catch
 41

                {
 42

                    return "";
 43

                }
 44

                
 45

                System.IO.Stream stream = response.GetResponseStream();
 46


 47

                Encoding source;
 48

                try
 49

                {
 50

                    source = Encoding.GetEncoding(encoding);
 51

                }
 52

                catch
 53

                {
 54

                    source = Encoding.UTF8;
 55

                }
 56


 57

                StreamReader sr = new StreamReader(stream, source);
 58

                try
 59

                {
 60

                    str = sr.ReadToEnd();
 61

                }
 62

                catch 
 63

                {
 64

                    return "";
 65

                }
 66

                sr.Close();
 67

                stream.Close();
 68

            }
 69

            else//HTTP POST
 70

            {
 71

                try
 72

                {
 73

                    ASCIIEncoding asciiencoding = new ASCIIEncoding();
 74

                    byte[] bytes = asciiencoding.GetBytes(postdata);
 75


 76

                    request = (HttpWebRequest)System.Net.HttpWebRequest.Create(url);
 77

                    request.Method = "POST";
 78

                    request.ContentType = "application/x-www-form-urlencoded";
 79

                    request.ContentLength = postdata.Length;
 80


 81

                    Stream poststream = request.GetRequestStream();
 82

                    poststream.Write(bytes, 0, bytes.Length);
 83

                    poststream.Close();
 84


 85

                    response = (HttpWebResponse)request.GetResponse();
 86


 87

                    StreamReader sr = new StreamReader(response.GetResponseStream(), System.Text.Encoding.GetEncoding("GB2312"));
 88

                    str = sr.ReadToEnd();
 89

                    response.Close();
 90

                }
 91

                catch
 92

                {
 93

                    return "";
 94

                }
 95

            }
 96


 97

            uo.Url = response.ResponseUri.ToString();
 98


 99

            //在客户端包含include文件
100

            if (include)
101

            {
102

                System.Text.RegularExpressions.Regex regex = new Regex(@"<!--/W*include.*?-->", RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline);
103

                MatchCollection mc = regex.Matches(str);
104

                if (mc.Count > 0)
105

                {
106

                    System.Text.RegularExpressions.Regex urlregex = new Regex("(?<=/").*(?=/")", RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline);
107


108

                    string[] segments = regex.Split(str);
109


110

                    StringBuilder sb = new StringBuilder();
111

                    sb.Append(segments[0]);
112

                    for (int i = 1; i <= mc.Count; i++)
113

                    {
114

                        string s = mc[i - 1].Value;
115

                        string newurl = urlregex.Match(s).Value;
116

                        UrlOperation newuo = uo.Forward(newurl);
117

                        string included = LoadUrl(ref newuo, encoding, "", true);
118

                        sb.Append(included);
119

                        sb.Append(segments[i]);
120

                    }
121


122

                    str = sb.ToString();
123

                }
124

            }
125


126

            //页面重定向
127

            string redirection=GetRedirection(str).Trim();
128

            if (redirection.Length > 0&&redirectioncounter<5)
129

            {
130

                uo=uo.Forward(redirection);
131

                return LoadUrl(ref uo, encoding, postdata, include, redirectioncounter + 1);
132

            }
133

            else
134

                return str;
135

        }
136


 
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: