您的位置:首页 > 编程语言 > ASP

ASP.NET中过滤HTML字符串的两个方法

2010-12-23 17:04 531 查看
/// <summary>去除HTML标记

///

/// </summary>

/// <param name="Htmlstring">包括HTML的源码</param>

/// <returns>已经去除后的文字</returns>

public

static

string
GetNoHTMLString(
string
Htmlstring)

{

//删除脚本

Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>"
,
""
, RegexOptions.IgnoreCase);

//删除HTML

Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>"
,
""
, RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"([/r/n])[/s]+"
,
""
, RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"-->"
,
""
, RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"<!--.*"
,
""
, RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);"
,
"/""
, RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);"
,
"&"
, RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);"
,
"<"
, RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);"
,
">"
, RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);"
,
" "
, RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);"
,
"/xa1"
, RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);"
,
"/xa2"
, RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);"
,
"/xa3"
, RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);"
,
"/xa9"
, RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"&#(/d+);"
,
""
, RegexOptions.IgnoreCase);

Htmlstring.Replace("<"
,
""
);

Htmlstring.Replace(">"
,
""
);

Htmlstring.Replace("/r/n"
,
""
);

Htmlstring = HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();

return
Htmlstring;

}

/// <summary>获取显示的字符串,可显示HTML标签,但把危险的HTML标签过滤,如iframe,script等。

///

/// </summary>

/// <param name="str">未处理的字符串</param>

/// <returns></returns>

public

static

string
GetSafeHTMLString(
string
str)

{

str = Regex.Replace(str, @"<applet[^>]*?>.*?</applet>"
,
""
, RegexOptions.IgnoreCase);

str = Regex.Replace(str, @"<body[^>]*?>.*?</body>"
,
""
, RegexOptions.IgnoreCase);

str = Regex.Replace(str, @"<embed[^>]*?>.*?</embed>"
,
""
, RegexOptions.IgnoreCase);

str = Regex.Replace(str, @"<frame[^>]*?>.*?</frame>"
,
""
, RegexOptions.IgnoreCase);

str = Regex.Replace(str, @"<script[^>]*?>.*?</script>"
,
""
, RegexOptions.IgnoreCase);

str = Regex.Replace(str, @"<frameset[^>]*?>.*?</frameset>"
,
""
, RegexOptions.IgnoreCase);

str = Regex.Replace(str, @"<html[^>]*?>.*?</html>"
,
""
, RegexOptions.IgnoreCase);

str = Regex.Replace(str, @"<iframe[^>]*?>.*?</iframe>"
,
""
, RegexOptions.IgnoreCase);

str = Regex.Replace(str, @"<style[^>]*?>.*?</style>"
,
""
, RegexOptions.IgnoreCase);

str = Regex.Replace(str, @"<layer[^>]*?>.*?</layer>"
,
""
, RegexOptions.IgnoreCase);

str = Regex.Replace(str, @"<link[^>]*?>.*?</link>"
,
""
, RegexOptions.IgnoreCase);

str = Regex.Replace(str, @"<ilayer[^>]*?>.*?</ilayer>"
,
""
, RegexOptions.IgnoreCase);

str = Regex.Replace(str, @"<meta[^>]*?>.*?</meta>"
,
""
, RegexOptions.IgnoreCase);

str = Regex.Replace(str, @"<object[^>]*?>.*?</object>"
,
""
, RegexOptions.IgnoreCase);

return
str;

}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: