您的位置:首页 > Web前端 > HTML

自动识别HTML代码里的图片链接,并下载到服务器的指定目录源码

2007-11-06 12:34 906 查看
CODE:

using System;
using System.Text;
using System.Text.RegularExpressions;
using System.IO;
namespace zhang.Common
{
public class HanlerFiles
{
private string[] GetImgTag(string htmlStr)
{
Regex regObj = new Regex("<img.+?>", RegexOptions.Compiled | RegexOptions.IgnoreCase);
string[] strAry = new string[regObj.Matches(htmlStr).Count];
int i = 0;
foreach (Match matchItem in regObj.Matches(htmlStr))
{
strAry[i] = GetImgUrl(matchItem.Value);
i++;
}
return strAry;
}

private string GetImgUrl(string imgTagStr)
{
string str = "";
Regex regObj = new Regex("http://.+.(?:jpg|gif|bmp|png)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
foreach (Match matchItem in regObj.Matches(imgTagStr))
{
str = matchItem.Value;
}
return str;
}
/**//// <summary>
/// 根椐Html内空自动识别图像文件,并下载到服务器指定目录
/// </summary>
/// <param name="strHTML"></param>
/// <param name="path"></param>
/// <returns></returns>
public int SaveUrlPics(ref string strHTML, string path)
{
string[] imgurlAry = GetImgTag(strHTML);
try
{
for (int i = 0; i < imgurlAry.Length; i++)
{
//WebRequest req = WebRequest.Create(imgurlAry[i]);
string preStr = System.DateTime.Now.ToString() + "_";
preStr = preStr.Replace("-", "");
preStr = preStr.Replace(":", "");
preStr = preStr.Replace(" ", "");
WebClient wc = new WebClient();
wc.DownloadFile(imgurlAry[i], HttpContext.Current.Server.MapPath(path) + "/" + preStr + imgurlAry[i].Substring(imgurlAry[i].LastIndexOf("/") + 1));
strHTML = strHTML.Replace(imgurlAry[i], path + preStr + imgurlAry[i].Substring(imgurlAry[i].LastIndexOf("/") + 1));
}

}
catch (Exception ex)
{
//return ex.Message;
}
return imgurlAry.Length;
}

}
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: 
相关文章推荐