从一个HTML返回所有的图片链接
2009-10-29 08:17
302 查看
uses mshtml, ActiveX, COMObj, IdHTTP, idURI;
{ .... }
procedure GetImageLinks(AURL: string; AList: TStrings);
var
IDoc: IHTMLDocument2;
strHTML: string;
v: Variant;
x: Integer;
ovLinks: OleVariant;
DocURL: string;
URI: TidURI;
ImgURL: string;
idHTTP: TidHTTP;
begin
AList.Clear;
URI := TidURI.Create(AURL);
try
DocURL := ’http://’ + URI.Host;
if URI.Path <> ’/’ then
DocURL := DocURL + URI.Path;
finally
URI.Free;
end;
Idoc := CreateComObject(Class_HTMLDocument) as IHTMLDocument2;
try
IDoc.designMode := ’on’;
while IDoc.readyState <> ’complete’ do
Application.ProcessMessages;
v := VarArrayCreate([0, 0], VarVariant);
idHTTP := TidHTTP.Create(nil);
try
strHTML := idHTTP.Get(AURL);
finally
idHTTP.Free;
end;
v[0] := strHTML;
IDoc.Write(PSafeArray(System.TVarData(v).VArray));
IDoc.designMode := ’off’;
while IDoc.readyState <> ’complete’ do
Application.ProcessMessages;
ovLinks := IDoc.all.tags(’IMG’);
if ovLinks.Length > 0 then
begin
for x := 0 to ovLinks.Length - 1 do
begin
ImgURL := ovLinks.Item(x).src;
// The stuff below will probably need a little tweaking
// Deteriming and turning realtive URLs into absolute URLs
// is not that difficult but this is all I could come up with
// in such a short notice.
if (ImgURL[1] = ’/’) then
begin
// more than likely a relative URL so
// append the DocURL
ImgURL := DocURL + ImgUrl;
end
else
begin
if (Copy(ImgURL, 1, 11) = ’about:blank’) then
begin
ImgURL := DocURL + Copy(ImgUrl, 12, Length(ImgURL));
end;
end;
AList.Add(ImgURL);
end;
end;
finally
IDoc := nil;
end;
end;
// Beispiel:
// Example:
procedure TForm1.Button1Click(Sender: TObject);
begin
GetImageLinks(’http://www.swissdelphicenter.ch’, Memo1.Lines);
end;
{ .... }
procedure GetImageLinks(AURL: string; AList: TStrings);
var
IDoc: IHTMLDocument2;
strHTML: string;
v: Variant;
x: Integer;
ovLinks: OleVariant;
DocURL: string;
URI: TidURI;
ImgURL: string;
idHTTP: TidHTTP;
begin
AList.Clear;
URI := TidURI.Create(AURL);
try
DocURL := ’http://’ + URI.Host;
if URI.Path <> ’/’ then
DocURL := DocURL + URI.Path;
finally
URI.Free;
end;
Idoc := CreateComObject(Class_HTMLDocument) as IHTMLDocument2;
try
IDoc.designMode := ’on’;
while IDoc.readyState <> ’complete’ do
Application.ProcessMessages;
v := VarArrayCreate([0, 0], VarVariant);
idHTTP := TidHTTP.Create(nil);
try
strHTML := idHTTP.Get(AURL);
finally
idHTTP.Free;
end;
v[0] := strHTML;
IDoc.Write(PSafeArray(System.TVarData(v).VArray));
IDoc.designMode := ’off’;
while IDoc.readyState <> ’complete’ do
Application.ProcessMessages;
ovLinks := IDoc.all.tags(’IMG’);
if ovLinks.Length > 0 then
begin
for x := 0 to ovLinks.Length - 1 do
begin
ImgURL := ovLinks.Item(x).src;
// The stuff below will probably need a little tweaking
// Deteriming and turning realtive URLs into absolute URLs
// is not that difficult but this is all I could come up with
// in such a short notice.
if (ImgURL[1] = ’/’) then
begin
// more than likely a relative URL so
// append the DocURL
ImgURL := DocURL + ImgUrl;
end
else
begin
if (Copy(ImgURL, 1, 11) = ’about:blank’) then
begin
ImgURL := DocURL + Copy(ImgUrl, 12, Length(ImgURL));
end;
end;
AList.Add(ImgURL);
end;
end;
finally
IDoc := nil;
end;
end;
// Beispiel:
// Example:
procedure TForm1.Button1Click(Sender: TObject);
begin
GetImageLinks(’http://www.swissdelphicenter.ch’, Memo1.Lines);
end;
相关文章推荐
- 从一个HTML返回所有的图片链接
- iOS 获取html中的所有图片链接地址
- php代码从html代码中提取出所有的图片返回数组
- [导入][原创]使用正则表达式找出HTML代码内所有IMG图片的SRC链接地址
- iOS 获取html中的所有图片链接地址
- 用JS获取Html中所有图片文件流然后替换原有链接
- jsoup 提取 html 中的所有链接、图片和媒体
- HTML图片链接详解
- webview 获取html中所有的图片资源并给图片添加点击事件
- Swift截取HTML中的所有图片url
- android Intent.setType() 过滤图片,返回所有的文件类型
- 查询从一个表中返回所有记录不在另外一个表中的结果集的方法
- js获取html代码中所有图片地址
- MVC 自定义标签,给Html.ActionLink加上支持图片链接的功能
- jsoup 获得html页面所有的连接和图片,js,css等
- 抽取html中的所有链接
- C# 根据URL返回HTML_根据URL获取图片信息/缩略图
- asp.net正则表达式提取网页网址、标题、图片,滤所有HTML标签
- XZ_iOS之使用webView加载后台返回的一串html代码,HTML中的图片不显示
- Windows 10 IIS所有的html返回空白