ASP.NET中过滤HTML字符串的两个方法
2010-12-23 17:04
531 查看
/// <summary>去除HTML标记
///
/// </summary>
/// <param name="Htmlstring">包括HTML的源码</param>
/// <returns>已经去除后的文字</returns>
public
static
string
GetNoHTMLString(
string
Htmlstring)
{
//删除脚本
Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>"
,
""
, RegexOptions.IgnoreCase);
//删除HTML
Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>"
,
""
, RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"([/r/n])[/s]+"
,
""
, RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"-->"
,
""
, RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"<!--.*"
,
""
, RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);"
,
"/""
, RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);"
,
"&"
, RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);"
,
"<"
, RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);"
,
">"
, RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);"
,
" "
, RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);"
,
"/xa1"
, RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);"
,
"/xa2"
, RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);"
,
"/xa3"
, RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);"
,
"/xa9"
, RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"(/d+);"
,
""
, RegexOptions.IgnoreCase);
Htmlstring.Replace("<"
,
""
);
Htmlstring.Replace(">"
,
""
);
Htmlstring.Replace("/r/n"
,
""
);
Htmlstring = HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();
return
Htmlstring;
}
/// <summary>获取显示的字符串,可显示HTML标签,但把危险的HTML标签过滤,如iframe,script等。
///
/// </summary>
/// <param name="str">未处理的字符串</param>
/// <returns></returns>
public
static
string
GetSafeHTMLString(
string
str)
{
str = Regex.Replace(str, @"<applet[^>]*?>.*?</applet>"
,
""
, RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"<body[^>]*?>.*?</body>"
,
""
, RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"<embed[^>]*?>.*?</embed>"
,
""
, RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"<frame[^>]*?>.*?</frame>"
,
""
, RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"<script[^>]*?>.*?</script>"
,
""
, RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"<frameset[^>]*?>.*?</frameset>"
,
""
, RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"<html[^>]*?>.*?</html>"
,
""
, RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"<iframe[^>]*?>.*?</iframe>"
,
""
, RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"<style[^>]*?>.*?</style>"
,
""
, RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"<layer[^>]*?>.*?</layer>"
,
""
, RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"<link[^>]*?>.*?</link>"
,
""
, RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"<ilayer[^>]*?>.*?</ilayer>"
,
""
, RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"<meta[^>]*?>.*?</meta>"
,
""
, RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"<object[^>]*?>.*?</object>"
,
""
, RegexOptions.IgnoreCase);
return
str;
}
///
/// </summary>
/// <param name="Htmlstring">包括HTML的源码</param>
/// <returns>已经去除后的文字</returns>
public
static
string
GetNoHTMLString(
string
Htmlstring)
{
//删除脚本
Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>"
,
""
, RegexOptions.IgnoreCase);
//删除HTML
Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>"
,
""
, RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"([/r/n])[/s]+"
,
""
, RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"-->"
,
""
, RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"<!--.*"
,
""
, RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);"
,
"/""
, RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);"
,
"&"
, RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);"
,
"<"
, RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);"
,
">"
, RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);"
,
" "
, RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);"
,
"/xa1"
, RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);"
,
"/xa2"
, RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);"
,
"/xa3"
, RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);"
,
"/xa9"
, RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"(/d+);"
,
""
, RegexOptions.IgnoreCase);
Htmlstring.Replace("<"
,
""
);
Htmlstring.Replace(">"
,
""
);
Htmlstring.Replace("/r/n"
,
""
);
Htmlstring = HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();
return
Htmlstring;
}
/// <summary>获取显示的字符串,可显示HTML标签,但把危险的HTML标签过滤,如iframe,script等。
///
/// </summary>
/// <param name="str">未处理的字符串</param>
/// <returns></returns>
public
static
string
GetSafeHTMLString(
string
str)
{
str = Regex.Replace(str, @"<applet[^>]*?>.*?</applet>"
,
""
, RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"<body[^>]*?>.*?</body>"
,
""
, RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"<embed[^>]*?>.*?</embed>"
,
""
, RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"<frame[^>]*?>.*?</frame>"
,
""
, RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"<script[^>]*?>.*?</script>"
,
""
, RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"<frameset[^>]*?>.*?</frameset>"
,
""
, RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"<html[^>]*?>.*?</html>"
,
""
, RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"<iframe[^>]*?>.*?</iframe>"
,
""
, RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"<style[^>]*?>.*?</style>"
,
""
, RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"<layer[^>]*?>.*?</layer>"
,
""
, RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"<link[^>]*?>.*?</link>"
,
""
, RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"<ilayer[^>]*?>.*?</ilayer>"
,
""
, RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"<meta[^>]*?>.*?</meta>"
,
""
, RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"<object[^>]*?>.*?</object>"
,
""
, RegexOptions.IgnoreCase);
return
str;
}
相关文章推荐
- ASP.NET中过滤HTML字符串的两个方法
- ASP.NET中过滤HTML字符串的两个方法
- ASP.NET中过滤HTML字符串的两个方法
- asp.net 截取带有html标签的字符串(先过滤html,再截取)的方法
- ASP.NET过滤HTML字符串方法总结
- ASP.NET过滤HTML字符串方法总结
- 在ASP.NET中过滤HTML字符串总结
- ASP.NET过滤HTML标签只保留换行与空格的方法
- Asp.Net 生成验证码,清空缓存,分页方法,生成图片水印,防注入过滤,页面过滤HTML,文字转图片
- ASP.NET技巧:两个截取字符串的实用方法
- ASP.NET过滤HTML标签只保留换行与空格的方法
- asp.net 常用字符串过滤方法
- asp.net 常用字符串过滤方法 <转>
- asp.net MVC中使用Html.Checkbox提示该字符串未被识别为有效的布尔值错误的解决方法
- ASP.NET技巧:两个截取字符串的实用方法
- ASP.NET两个截取字符串的方法分享
- ASP.NET过滤HTML标签只保留换行与空格的方法
- asp.net 过滤SQL非法字符串方法
- ASP.NET过滤HTML标签只保留换行与空格的方法
- ASP.NET两个截取字符串的方法分享