您的位置:首页 > Web前端 > Node.js

将Html原码解析成IHTMLDocumet对象,然后使用DOMNode将html显示成一棵树

2008-05-13 12:29 429 查看

function StorePage(){d=document;t=d.selection?(d.selection.type!='None'?d.selection.createRange().text:''):(d.getSelection?d.getSelection():'');void(keyit=window.open('http://www.365key.com/storeit.aspx?t='+escape(d.title)+'&u='+escape(d.location.href)+'&c='+escape(t),'keyit','scrollbars=no,width=475,height=575,left=75,top=20,status=no,resizable=yes'));keyit.focus();}
功能:

将Html原码解析成IHTMLDocumet2对象,然后将IHTMLDocumet2转换成IHTMLDocumet3,使用DOMNode,将html显示成一棵树。此解析不执行任何脚本,不从网上下载任何资料,是一个纯文本的解析。

(方法 Parse(string str) 一个轻量级Parsing 实现。这个代码不会从网上下载任何资料,也不会执行任何脚本,纯属Parsing。
Parsing是通过MSHTML的Markup Service实现的。要正确使用这个代码,需要添加MSHTML引用。)

要正确编译如下代码,还需要修改unsafe(启用不安全模式)编译器选项,将其开启。

方法:在“项目”->“<应用程序名称>属性”对话框中打开“配置属性”,选中“生成”项,修改“允许不安全代码块”的内容为true.

[C#]

using System;
using System.Drawing;
using System.Collections;
using System.ComponentModel;
using System.Windows.Forms;
using System.Data;
using mshtml;
using System.Runtime.InteropServices;
using System.IO;

namespace WindowsApplication1
{

[ComVisible(true), ComImport(), Guid("7FD52380-4E07-101B-AE2D-08002B2EC713") , InterfaceTypeAttribute(ComInterfaceType.InterfaceIsIUnknown)]
public interface IPersistStreamInit
{
void GetClassID([In, Out] ref Guid pClassID);
[return: MarshalAs(UnmanagedType.I4)] [PreserveSig]
int IsDirty();
void Load([In, MarshalAs(UnmanagedType.Interface)] UCOMIStream pstm);
void Save([In, MarshalAs(UnmanagedType.Interface)] UCOMIStream pstm,
[In, MarshalAs(UnmanagedType.I4)] int fClearDirty);
void GetSizeMax([Out, MarshalAs(UnmanagedType.LPArray)] long pcbSize);
void InitNew();
}
/// <summary>
/// Form1 的摘要说明。
/// </summary>
public class Form1 : System.Windows.Forms.Form
{
private System.Windows.Forms.Button button1;
private System.Windows.Forms.TreeView treeView1;
/// <summary>
/// 必需的设计器变量。
/// </summary>
private System.ComponentModel.Container components = null;

public Form1()
{
//
// Windows 窗体设计器支持所必需的
//
InitializeComponent();

//
// TODO: 在 InitializeComponent 调用后添加任何构造函数代码
//
}

/// <summary>
/// 清理所有正在使用的资源。
/// </summary>
protected override void Dispose( bool disposing )
{
if( disposing )
{
if (components != null)
{
components.Dispose();
}
}
base.Dispose( disposing );
}

#region Windows 窗体设计器生成的代码
/// <summary>
/// 设计器支持所需的方法 - 不要使用代码编辑器修改
/// 此方法的内容。
/// </summary>
private void InitializeComponent()
{
this.button1 = new System.Windows.Forms.Button();
this.treeView1 = new System.Windows.Forms.TreeView();
this.SuspendLayout();
//
// button1
//
this.button1.Location = new System.Drawing.Point(24, 16);
this.button1.Name = "button1";
this.button1.Size = new System.Drawing.Size(88, 24);
this.button1.TabIndex = 0;
this.button1.Text = "button1";
this.button1.Click += new System.EventHandler(this.button1_Click);
//
// treeView1
//
this.treeView1.ImageIndex = -1;
this.treeView1.Location = new System.Drawing.Point(280, 96);
this.treeView1.Name = "treeView1";
this.treeView1.SelectedImageIndex = -1;
this.treeView1.Size = new System.Drawing.Size(288, 224);
this.treeView1.TabIndex = 1;
//
// Form1
//
this.AutoScaleBaseSize = new System.Drawing.Size(6, 14);
this.ClientSize = new System.Drawing.Size(664, 333);
this.Controls.Add(this.treeView1);
this.Controls.Add(this.button1);
this.Name = "Form1";
this.Text = "Form1";
this.ResumeLayout(false);

}
#endregion

/// <summary>
/// 应用程序的主入口点。
/// </summary>
[STAThread]
static void Main()
{
Application.Run(new Form1());
}
unsafe IHTMLDocument2 Parse(string s)
{
IHTMLDocument2 pDocument=new HTMLDocumentClass();
if(pDocument!=null)
{
IPersistStreamInit pPersist=pDocument as IPersistStreamInit ;
pPersist.InitNew();
pPersist=null;
IMarkupServices ms=pDocument as IMarkupServices ;
if(ms!=null)
{
IMarkupContainer pMC=null;
IMarkupPointer pStart,pEnd;
ms.CreateMarkupPointer(out pStart);
ms.CreateMarkupPointer(out pEnd);
System.Text.StringBuilder sb=new System.Text.StringBuilder(s);
IntPtr pSource=Marshal.StringToHGlobalUni(s);
ms.ParseString(ref *(ushort*)pSource.ToPointer(),0,out pMC,pStart,pEnd);
if(pMC!=null)
{
Marshal.Release(pSource);
return pMC as IHTMLDocument2;
}
Marshal.Release(pSource);
}
}
return null;
}

private void button1_Click(object sender, System.EventArgs e)
{

string html="";
string filename="D://NetC#Program//html//163.htm";
if (!File.Exists(filename))
{
Console.WriteLine("文件不存在");
return;
}

StreamReader sr1 = new StreamReader(
(System.IO.Stream)File.OpenRead(filename),System.Text.Encoding.Default);
html="";
while (sr1.Peek()>-1)
{
html=html+sr1.ReadToEnd();
}
sr1.Close();

IHTMLDocument2 doc2 = Parse(html);

Console.WriteLine(doc2.styleSheets.length);
IHTMLDocument3 HTMLDocument=(IHTMLDocument3)doc2;
IHTMLDOMNode rootDomNode=(IHTMLDOMNode)HTMLDocument.documentElement;
TreeNode root=treeView1.Nodes.Add("HTML");
InsertDOMNodes(rootDomNode,root);

}
private void InsertDOMNodes(IHTMLDOMNode parentnode,TreeNode tree_node)
{

if(parentnode.hasChildNodes())//是否有子结点
{
IHTMLDOMChildrenCollection allchild = (IHTMLDOMChildrenCollection)parentnode.childNodes;
int length = allchild.length;
for(int i=0;i<length;i++)//对每个子结点进行处理,首先取出每个子节点的属性,然后进行递归
{
IHTMLDOMNode child_node = (IHTMLDOMNode)allchild.item(i);
string m_snodeName =child_node.nodeName;
object m_onodevalue =child_node.nodeValue;
string m_snodetype =child_node.nodeType.ToString();
string m_snodevalue ="";
if ( m_onodevalue!=null)
m_snodevalue =m_onodevalue.ToString().Trim();
TreeNode tempnode=null;

if (child_node.nodeName.Equals("#text"))
{
if ((m_snodevalue!=null)&& (!m_snodevalue.Equals("")))
{
tempnode = tree_node.Nodes.Add(m_snodevalue);

}

}
else
{
tempnode = tree_node.Nodes.Add(child_node.nodeName);

InsertDOMNodes(child_node,tempnode);
}
}
}

}
}
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: 
相关文章推荐