您的位置:首页 > Web前端 > HTML

利用com调用ie进行html解析

2009-08-23 22:04 274 查看
别的就不多说了,直接上代码,代码很简单的,不懂的留言。

1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Text;
5using System.Runtime.InteropServices;
6using mshtml;
7using System.Threading;
8using System.Runtime.InteropServices.ComTypes;
9using System.IO;
10
11namespace Eric.Utilities.Html
12{
13    public enum HRESULT
14    {
15        E_FAIL = -2147467259,
16        E_INVALIDARG = -2147024809,
17        E_NOINTERFACE = -2147467262,
18        E_NOTIMPL = -2147467263,
19        E_UNEXPECTED = -2147418113,
20        S_FALSE = 1,
21        S_OK = 0
22    }
23
24    [ComImport, Guid("0000010c-0000-0000-C000-000000000046"), InterfaceType(ComInterfaceType.InterfaceIsIUnknown), ComVisible(true)]
25    public interface IPersist
26    {
27        void GetClassID(ref Guid pClassID);
28    }
29
30    [ComImport, InterfaceType(ComInterfaceType.InterfaceIsIUnknown), ComVisible(true), Guid("7FD52380-4E07-101B-AE2D-08002B2EC713")]
31    public interface IPersistStreamInit : IPersist
32    {
33        new void GetClassID(ref Guid pClassID);
34        [PreserveSig]
35        int IsDirty();
36        [PreserveSig]
37        HRESULT Load(IStream pstm);
38        [PreserveSig]
39        HRESULT Save(IStream pstm, [MarshalAs(UnmanagedType.Bool)] bool fClearDirty);
40        [PreserveSig]
41        HRESULT GetSizeMax([In, Out, MarshalAs(UnmanagedType.U8)] ref long pcbSize);
42        [PreserveSig]
43        HRESULT InitNew();
44    }
45
46    public class HtmlParser
47    {
48        public IHTMLDocument3 Parse(string url)
49        {
50            HTMLDocument objMSHTML = new HTMLDocument();
51            IHTMLDocument2 objMSHTML2;
52            IHTMLDocument3 objMSHTML3;
53
54            IPersistStreamInit objIPS;
55            objIPS = objMSHTML as IPersistStreamInit;
56            objIPS.InitNew();
57            objIPS = null;
58
59            objMSHTML2 = objMSHTML.createDocumentFromUrl(url, "null");
60            while (objMSHTML2.readyState != "complete")
61            {
62                Thread.Sleep(1000);
63            }
64            objMSHTML3 = objMSHTML2 as IHTMLDocument3;
65            return objMSHTML3;
66        }
67
68        public IHTMLDocument3 ParseHtml(string html, Encoding encoding)
69        {
70            string tmpFile = Path.GetTempFileName();
71            File.WriteAllText(tmpFile, html, encoding);
72            return Parse(tmpFile);
73        }
74    }
75}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: