您的位置:首页 > 其它

利用 WebClient 实现下载并另存为txt 格式的文本文件

2010-08-07 18:08 288 查看
前几天看到同事在网上复制、粘贴管理方面的文章,一遍一遍地重复,这让我想到可不可写一个程序来完成呢,于是上网查资料,终于给他解决了,代码如下:

using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Text.RegularExpressions;
using System.IO;
using System.Text;

namespace WebUI
{
public partial class TestWebClient : System.Web.UI.Page
{
protected void Page_Load(object sender, EventArgs e)
{

}

protected void btnDownLoad_Click(object sender, EventArgs e)
{
for (int i = 1507; i <= 1507; i++)
{
string url = "http://www.ccmcsz.com/management/" + i + ".htm";
Response.Write(url);
SetLog(url, i.ToString());
Response.Write("<br/>");
}
}
public void SetLog(string url, string name)
{
try
{
string filepath = @"D:\Test163\";
Encoding defaultencode = Encoding.GetEncoding("gb2312");
string FileName = name + ".txt";

string NewFilePath = Path.Combine(filepath, FileName);
if (!Directory.Exists(filepath))
{
Directory.CreateDirectory(filepath);
}
System.Net.WebClient wc = new System.Net.WebClient();
Stream ss = wc.OpenRead(url);
StreamReader rd = new StreamReader(ss, defaultencode);
string message = rd.ReadToEnd();
rd.Close();
wc.Dispose();
message = DelHTML(message);
StreamWriter Sw = new StreamWriter(NewFilePath, true, defaultencode);
Sw.Write(message);
Sw.Flush();
Sw.Close();
Sw = null;
}
catch
{
this.Response.Write(url + "<br/>");
}

}

public static string DelHTML(string Htmlstring)
{
//删除脚本
Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);
//删除HTML
Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);
//Htmlstring = Regex.Replace(Htmlstring,@"<A>.*</A>","");
//Htmlstring = Regex.Replace(Htmlstring,@"<[a-zA-Z]*=\.[a-zA-Z]*\?[a-zA-Z]+=\d&\w=%[a-zA-Z]*|[A-Z0-9]","");
Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\"", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);", "", RegexOptions.IgnoreCase);
Htmlstring.Replace("<", "");
Htmlstring.Replace(">", "");
Htmlstring.Replace("\r\n", "");
//Htmlstring=HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();
return Htmlstring;
}

}
}


等待更新...
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: 
相关文章推荐