您的位置：首页 > 编程语言 > ASP

ASP.NET去除HTML标签并截取指定长度字符串

2017-05-31 10:56 801 查看

当我们使用编辑器存储内容时，保存到数据库的内容可含有html标签，如果要将其查询出来展示在页面上时有时会用到字符串截取，这时我们应该先对其进行html解码，再去html标签，最后再截取指定长度字符串。

/// <summary>
/// 去除html标签
/// </summary>
/// <param name="htmlStr"></param>
/// <returns></returns>
public static string NoHTML(string htmlStr)
{
if (htmlStr == null)
{
return "";
}
else
{
//删除脚本
htmlStr = Regex.Replace(htmlStr, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);
//删除HTML
htmlStr = Regex.Replace(htmlStr, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);
htmlStr = Regex.Replace(htmlStr, @"([rn])[s]+", "", RegexOptions.IgnoreCase);
htmlStr = Regex.Replace(htmlStr, @"-->", "", RegexOptions.IgnoreCase);
htmlStr = Regex.Replace(htmlStr, @"<!--.*", "", RegexOptions.IgnoreCase);
htmlStr = Regex.Replace(htmlStr, @"&(quot|#34);", "", RegexOptions.IgnoreCase);
htmlStr = Regex.Replace(htmlStr, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);
htmlStr = Regex.Replace(htmlStr, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);
htmlStr = Regex.Replace(htmlStr, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);
htmlStr = Regex.Replace(htmlStr, @"&(nbsp|#160);", "", RegexOptions.IgnoreCase);
htmlStr = Regex.Replace(htmlStr, @"&(iexcl|#161);", "xa1", RegexOptions.IgnoreCase);
htmlStr = Regex.Replace(htmlStr, @"&(cent|#162);", "xa2", RegexOptions.IgnoreCase);
htmlStr = Regex.Replace(htmlStr, @"&(pound|#163);", "xa3", RegexOptions.IgnoreCase);
htmlStr = Regex.Replace(htmlStr, @"&(copy|#169);", "xa9", RegexOptions.IgnoreCase);
htmlStr = Regex.Replace(htmlStr, @"&#(d+);", "", RegexOptions.IgnoreCase);
htmlStr = Regex.Replace(htmlStr, "xp_cmdshell", "", RegexOptions.IgnoreCase);
htmlStr = Regex.Replace(htmlStr, " ", "", RegexOptions.IgnoreCase);
htmlStr = Regex.Replace(htmlStr, "/r", "", RegexOptions.IgnoreCase);
htmlStr = Regex.Replace(htmlStr, "/n", "", RegexOptions.IgnoreCase);
//特殊的字符
htmlStr = htmlStr.Replace("<", "");
htmlStr = htmlStr.Replace(">", "");
htmlStr = htmlStr.Replace("*", "");
htmlStr = htmlStr.Replace("-", "");
htmlStr = htmlStr.Replace("?", "");
htmlStr = htmlStr.Replace(",", "");
htmlStr = htmlStr.Replace("/", "");
htmlStr = htmlStr.Replace(";", "");
htmlStr = htmlStr.Replace("*/", "");
htmlStr = htmlStr.Replace("rn", "");
htmlStr = HttpContext.Current.Server.HtmlEncode(htmlStr).Trim();
return htmlStr;
}
}

/// <summary>
/// 截取指定长度中英文字符串方法
/// 该方法是按照每个汉字两个字节计算，∴如要截取20个字符，需要将length设置为40
/// </summary>
/// <param name="stringToSub"></param>
/// <param name="length"></param>
/// <returns></returns>
public static string GetFirstString(string stringToSub, int length)
{
Regex regex = new Regex("[\u4e00-\u9fa5]+", RegexOptions.Compiled);
char[] stringChar = stringToSub.ToCharArray();
StringBuilder sb = new StringBuilder();
int nLength = 0;
bool isCut = false;
for (int i = 0; i < stringChar.Length; i++)
{
if (regex.IsMatch((stringChar[i]).ToString()))
{
sb.Append(stringChar[i]);
nLength += 2;
}
else
{
sb.Append(stringChar[i]);
nLength = nLength + 1;
}

if (nLength > length)
{
isCut = true;
break;
}
}
if (isCut)
return sb.ToString() + "..";
else
return sb.ToString();
}

要显示变量SNote的前20个字，调用方法如下：

GetFirstString(NoHTML(Server.HtmlDecode(SNote)), 40)

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签： asp.net 去除html标签

相关文章推荐

新的分享

章节导航