您的位置:首页 > 编程语言 > ASP

ASP获取网页全部图片地址并保存为数组的正则

2013-05-16 17:51 531 查看
目前还是有BUG的,最新的测试页面在: http://www.reallydo.com/getimg.asp
正则分析页面在: http://jorkin.reallydo.com/article.asp?id=380
发现BUG请在后面留言,谢谢.

1.31修正

src=后面有空格不能正确匹配.已修正.

src=''为空时出错.已修正.

发现BUG: 图片路径有多个空格时只能保留一个.未修正.

2.18修正

图片路径有多个空格时只能保留一个的BUG.已修正.

复制代码 代码如下:

<%

'功能:获取全部图片地址,保存到一个数组.

'来源:http://jorkin.reallydo.com/article.asp?id=448

'需要ReplaceAll函数:http://jorkin.reallydo.com/article.asp?id=406

Function getIMG(sString)

Dim sReallyDo, regEx, iReallyDo

Dim oMatches, cMatch

'//定义一个空数组

iReallyDo = -1

ReDim aReallyDo(iReallyDo)

If IsNull(sString) Then

getIMG = ""

Exit Function

End If

'//格式化HTML代码

'//将每个 <img 换行 方便正则替换

sReallyDo = sString

On Error Resume Next

sReallyDo = Replace(sReallyDo, vbCr, " ")

sReallyDo = Replace(sReallyDo, vbLf, " ")

sReallyDo = Replace(sReallyDo, vbTab, " ")

sReallyDo = Replace(sReallyDo, "<img ", vbCrLf & "<img ", 1, -1, 1)

sReallyDo = Replace(sReallyDo, "/>", " />", 1, -1, 1)

sReallyDo = ReplaceAll(sReallyDo, "= ", "=", True)

sReallyDo = ReplaceAll(sReallyDo, "> ", ">", True)

sReallyDo = Replace(sReallyDo, "><", ">" & vbCrLf & "<")

sReallyDo = Trim(sReallyDo)

On Error GoTo 0

Set regEx = New RegExp

regEx.IgnoreCase = True

regEx.Global = True

'//去除onclick,onload等脚本

regEx.Pattern = "\s[on].+?=([\""|\'])(.*?)\1"

sReallyDo = regEx.Replace(sReallyDo, "")

'//将SRC不带引号的图片地址加上引号

regEx.Pattern = "<img.*?\ssrc=([^\""\'\s][^\""\'\s>]*).*?>"

sReallyDo = regEx.Replace(sReallyDo, "<img src=""$1"" />")

'//正则匹配图片SRC地址

regEx.Pattern = "<img.*?\ssrc=([\""\'])([^\""\']+?)\1.*?>"

Set oMatches = regEx.Execute(sReallyDo)

'//将图片地址存入数组

For Each cMatch in oMatches

iReallyDo = iReallyDo + 1

ReDim Preserve aReallyDo(iReallyDo)

aReallyDo(iReallyDo) = regEx.Replace(cMatch.Value, "$2")

Next

getIMG = aReallyDo

End Function

%>
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: