您的位置:首页 > 编程语言 > ASP

asp.net过滤HTML标签的几个函数

2009-01-05 22:31 627 查看
using System.Web;
using System.Text.RegularExpressions;

namespace XZSOFT.XZCRM.Common
{
public static class HtmlHelper
{
///   <summary>
///   去除HTML标记
///   </summary>
public static string NoHtml(string htmlstring)
{
//删除脚本
htmlstring = Regex.Replace(htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);
//删除HTML
htmlstring = Regex.Replace(htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);
htmlstring = Regex.Replace(htmlstring, @"([/r/n])[/s]+", "", RegexOptions.IgnoreCase);
htmlstring = Regex.Replace(htmlstring, @"-->", "", RegexOptions.IgnoreCase);
htmlstring = Regex.Replace(htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);
htmlstring = Regex.Replace(htmlstring, @"&(quot|#34);", "\"", RegexOptions.IgnoreCase);
htmlstring = Regex.Replace(htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);
htmlstring = Regex.Replace(htmlstring, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);
htmlstring = Regex.Replace(htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);
htmlstring = Regex.Replace(htmlstring, @"&(nbsp|#160);", "   ", RegexOptions.IgnoreCase);
htmlstring = Regex.Replace(htmlstring, @"&(iexcl|#161);", "/xa1", RegexOptions.IgnoreCase);
htmlstring = Regex.Replace(htmlstring, @"&(cent|#162);", "/xa2", RegexOptions.IgnoreCase);
htmlstring = Regex.Replace(htmlstring, @"&(pound|#163);", "/xa3", RegexOptions.IgnoreCase);
htmlstring = Regex.Replace(htmlstring, @"&(copy|#169);", "/xa9", RegexOptions.IgnoreCase);
htmlstring = Regex.Replace(htmlstring, @"&#(/d+);", "", RegexOptions.IgnoreCase);
htmlstring.Replace("<", "");
htmlstring.Replace(">", "");
htmlstring.Replace("/r/n", "");
htmlstring = HttpContext.Current.Server.HtmlEncode(htmlstring).Trim();

return htmlstring;
}
public static string StripHtml(string strHtml)
{
string[] aryReg =
{
@"<script[^>]*?>.*?</script>",
@"<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""'])(\\[",
@"'tbnr]|[^\7])*?\7|\w+)|.{0})|\s)*?(\/\s*)?>", @"([\r\n])[\s]+",
@"&(quot|#34);", @"&(amp|#38);", @"&(lt|#60);", @"&(gt|#62);",
@"&(nbsp|#160);", @"&(iexcl|#161);", @"&(cent|#162);", @"&(pound|#163);",
@"&(copy|#169);", @"&#(\d+);", @"-->", @"<!--.*\n"
};

string[] aryRep =
{
"", "", "", "\"", "&", "<", ">", "   ", "\xa1",  //chr(161),
"\xa2",  //chr(162),
"\xa3",  //chr(163),
"\xa9",  //chr(169),
"", "\r\n", ""
};

string strOutput = strHtml;
for (int i = 0; i < aryReg.Length; i++)
{
Regex regex = new Regex(aryReg[i], RegexOptions.IgnoreCase);
strOutput = regex.Replace(strOutput, aryRep[i]);
}
strOutput.Replace("<", "");
strOutput.Replace(">", "");
strOutput.Replace("\r\n", "");
return strOutput;

}
///   <summary>
///   移除HTML标签
///   </summary>
///   <param   name="htmlStr">HTMLStr</param>
public static string ParseTags(string htmlStr)
{
return Regex.Replace(htmlStr, "<[^>]*>", "");
}
///   <summary>
///   取出文本中的图片地址
///   </summary>
///   <param   name="htmlStr">HTMLStr</param>
public static string GetImgUrl(string htmlStr)
{
string str = string.Empty;
Regex r = new Regex(@"<img/s+[^>]*/s*src/s*=/s*([']?)(?<url>/S+)'?[^>]*>", RegexOptions.Compiled);
Match m = r.Match(htmlStr.ToLower());
if (m.Success)
str = m.Result("${url}");
return str;
}
}
}


测试用例:

protected
void Page_Load(objectsender,EventArgs
e)

{

string test="例:<strong id=/"abc/">你好</strong><p><a href=/"http://www.xingzhu.net.cn/"
target=/"_blank/">星烛网</a></p><IMG title=星烛网 src=/"http://121.9.206.74/Gift/face/2.gif/" border=0>H<EM>i,星烛网</EM>";

Response.Write(HtmlHelper.NoHTML(test));

}

输出结果:

例:你好星烛网Hi,星烛网
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: