您的位置:首页 > 其它

获取网页中的图片并显示出来

2010-02-24 11:08 495 查看
此类实现以下功能:

1、读取网页内容

2、读取网页中的图片,并出来图片的路径

3、剔除重复的图片,并显示。

类代码为:

public class img
{
public string url = "";
MatchCollection matches = null;
public img(string URL)
{
url = URL;
if (url.IndexOf("http://") < 0)
{
url = "http://" + url;
}
// 获取网页内容
WebRequest request = WebRequest.Create(url);
request.Credentials = CredentialCache.DefaultCredentials;
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
Stream dataStream = response.GetResponseStream();
StreamReader reader = new StreamReader(dataStream, Encoding.Default);
string getHtml = reader.ReadToEnd();
reader.Close();
dataStream.Close();
response.Close();

// 定义正则表达式用来匹配 img 标签
Regex regImg = new Regex(@"<img/b[^<>]*?/bsrc[/s/t/r/n]*=[/s/t/r/n]*[""']?[/s/t/r/n]*(?<imgUrl>[^/s/t/r/n""'<>]*)[^<>]*?/?[/s/t/r/n]*>", RegexOptions.IgnoreCase);
// 搜索匹配的字符串
matches = regImg.Matches(getHtml);
}
public string getList()
{
StringBuilder sb = new StringBuilder(url);
if (matches.Count > 0)
{
ArrayList al = cutCF(matches, "imgUrl");
sb.Append("<br> 获取图片数目:" + al.Count + " 个</br>");
string mig = "";
int count = 0;
foreach (string mc in al)
{
string ximg = getImgUrl(mc.ToString());
if (ximg != "")
{
if (mig != ximg)
{
count++;
mig = ximg;
sb.Append("<img src="/" mce_src="/""" + mig + "/"/><br>");
}
}
else
{
ximg = getImgUrl(url, mc.ToString());
if (mig != ximg)
{
count++;
mig = ximg;
sb.Append("<img src="/" mce_src="/""" + mig + "/"/><br>");
}
//WebClient wc = new WebClient();
//wc.DownloadFile(mc.Groups["imgUrl"].Value, HttpContext.Current.Server.MapPath("img") + "/" + i++ + mc.Groups["imgUrl"].Value.Substring(mc.Groups["imgUrl"].Value.LastIndexOf("/") + 1));
}
}
sb.Append("<br>显示出图片数目:" + count + " 个</br>");
}
else
{
sb.Append("没找到图片!");
}
return sb.ToString();
}
/// <summary>
/// 根据网页地址获取域名
/// </summary>
/// <param name="strHtmlPagePath"></param>
/// <returns></returns>
public string GetUrlDomainName(string strHtmlPagePath)
{
string p = @"http://[^/.]*/.(?<domain>[^/]*)";
Regex reg = new Regex(p, RegexOptions.IgnoreCase);
Match m = reg.Match(strHtmlPagePath);
return m.Groups["domain"].Value;
}
/// <summary>
/// 得到绝对路径的图片url 如果是相对路径 则返回空
/// </summary>
/// <param name="imgStr"></param>
/// <returns></returns>
private string getImgUrl(string imgStr)
{
string str = "";
Regex reg = new Regex(@"http://.+.([^/]*)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
foreach (Match mc in reg.Matches(imgStr))
{
str = mc.Value;
}
return str;
}
/// <summary>
/// 由图片的相对路径获取全路径
/// </summary>
/// <param name="url">网址</param>
/// <param name="imgstr">图片的相对路径</param>
/// <returns>全路径</returns>
private string getImgUrl(string url, string imgstr)
{
//获取网址中域名部分
string ym = url.Replace("http://", "_?");
string yuming = "http://" + (ym + "/").Substring((ym + "/").IndexOf("_?") + 2, (ym + "/").IndexOf("/") - 1);
string str = "";
//如果图片的路径是
if (imgstr.IndexOf("/") == 0)
{
str = yuming + imgstr.Substring(1, imgstr.Length - 1);
return str;
}
if (imgstr.IndexOf("../") == 1)
{
str = "http://" + ym.Substring(ym.IndexOf("_?") + 2, ym.LastIndexOf("/") - 1) + imgstr.Substring(1, imgstr.Length - 1);
return str;
}
str = yuming + imgstr;
return str;
}
/// <summary>
/// 清除重复记录
/// </summary>
/// <param name="mc"></param>
/// <param name="value"></param>
/// <returns></returns>
private ArrayList cutCF(MatchCollection mc, string value)
{
ArrayList al = new ArrayList();
string newStr = "";
if (mc.Count > 0)
{
for (int j = 0; j < mc.Count - 1; j++)
{
newStr = mc[j].Groups[value].Value;
bool mkbl = false;
for (int i = j + 1; i < mc.Count; i++)
{
if (newStr == mc[i].Groups[value].Value)
{
mkbl = true;
break;
}
}
if (!mkbl)
{
al.Add(mc[j].Groups[value].Value);
}
}
al.Add(mc[mc.Count - 1].Groups[value].Value);
}
return al;
}
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: