您的位置:首页 > Web前端 > HTML

获取页面Html代码,自动识别编码

2009-06-11 16:29 543 查看


public string GetHtml(string url)


{


string code = DecodeData(url);


HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);


request.Timeout = 30000;


request.Headers.Set("Pragma", "no-cache");


HttpWebResponse response = (HttpWebResponse)request.GetResponse();


Stream streamReceive = response.GetResponseStream();


Encoding encoding = code != string.Empty ? Encoding.GetEncoding(code.ToUpper()) : Encoding.Default;


StreamReader streamReader = new StreamReader(streamReceive, encoding);


string strResult = streamReader.ReadToEnd();


streamReader.Close();


streamReader.Dispose();


return strResult;


}


//http://blog.sunmast.com/natas/archive/2004/10/30/989.aspx,略有改动.




private string DecodeData(string Url)


{


WebRequest r = WebRequest.Create(Url);


WebResponse w = r.GetResponse();


//


// first see if content length header has charset = calue


//


String charset = string.Empty;


String ctype = w.Headers["content-type"];


if (ctype != null)


{


int ind = ctype.IndexOf("charset=");


if (ind != -1)


{


charset = ctype.Substring(ind + 8);


}


}


// save data to a memorystream


MemoryStream rawdata = new MemoryStream();


byte[] buffer = new byte[1024];


Stream rs = w.GetResponseStream();


int read = rs.Read(buffer, 0, buffer.Length);


while (read > 0)


{


rawdata.Write(buffer, 0, read);


read = rs.Read(buffer, 0, buffer.Length);


}


rs.Close();


//


// if ContentType is null, or did not contain charset, we search in body


//


if (charset == null)


{


MemoryStream ms = rawdata;


ms.Seek(0, SeekOrigin.Begin);


StreamReader srr = new StreamReader(ms, Encoding.ASCII);


String meta = srr.ReadToEnd();


if (meta != null)


{


int start_ind = meta.IndexOf("charset=");


int end_ind = -1;


if (start_ind != -1)


{


end_ind = meta.IndexOf("\"", start_ind);


if (end_ind != -1)


{


int start = start_ind + 8;


charset = meta.Substring(start, end_ind - start + 1);


charset = charset.TrimEnd(new Char[] { '>', '"' });


}


}


}


}


return charset.ToString();


}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: