处理文章截取有html脚本的问题
2010-10-21 15:09
344 查看
package people.util;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.servlet.jsp.tagext.TagSupport;
public class CutHtml extends TagSupport {
private static String htmlMatch = "";
public static String removeMatchHtmlTag() {
Pattern p = Pattern.compile("<([a-zA-Z]+)[^<>]*>(.*?)<///1>");
Matcher m = p.matcher(htmlMatch);
if (m.find()) {
htmlMatch = htmlMatch.replaceAll("<([a-zA-Z]+)[^<>]*>(.*?)<///1>", "$2");
removeMatchHtmlTag();
}
return htmlMatch;
}
public static String subStringHTML(String param, int length, String endWith) {
if (length < 1) {
return null;
}
if (param.length() < length) {
return param;
}
StringBuffer result = new StringBuffer();
StringBuffer str = new StringBuffer();
int n = 0;
char temp;
boolean isCode = false;
boolean isHTML = false;
for (int i = 0; i < param.length(); i++) {
temp = param.charAt(i);
if (temp == '<') {
isCode = true;
} else if (temp == '&') {
isHTML = true;
} else if (temp == '>' && isCode) {
n = n - 1;
isCode = false;
} else if (temp == ';' && isHTML) {
isHTML = false;
}
if (!isCode && !isHTML) {
n = n + 1;
if ((temp + "").getBytes().length > 1) {
n = n + 1;
}
str.append(temp);
}
result.append(temp);
if (n >= length) {
break;
}
}
result.append(endWith);
String temp_result = result.toString().replaceAll("(>)[^<>]*(<?)", "$1$2");
temp_result = temp_result
.replaceAll(
"<(AREA|BASE|BASEFONT|BODY|BR|COL|COLGROUP|DD|DT|FRAME|HEAD|HR|HTML|IMG|INPUT|ISINDEX|LI|LINK|META|OPTION|P|PARAM|TBODY|TD|TFOOT|TH|THEAD|TR|area|base|basefont|body|br|col|colgroup|dd|dt|frame|head|hr|html|img|input|isindex|li|link|meta|option|p|param|tbody|td|tfoot|th|thead|tr)[^<>]*/>",
"");
htmlMatch = temp_result;
temp_result = removeMatchHtmlTag();
Pattern p = Pattern.compile("<([a-zA-Z]+)[^<>]*>");
Matcher m = p.matcher(temp_result);
List<String> endHTML = new ArrayList<String>();
while (m.find()) {
endHTML.add(m.group(1));
}
for (int i = endHTML.size() - 1; i >= 0; i--) {
result.append("</");
result.append(endHTML.get(i));
result.append(">");
}
return result.toString();
}
}
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.servlet.jsp.tagext.TagSupport;
public class CutHtml extends TagSupport {
private static String htmlMatch = "";
public static String removeMatchHtmlTag() {
Pattern p = Pattern.compile("<([a-zA-Z]+)[^<>]*>(.*?)<///1>");
Matcher m = p.matcher(htmlMatch);
if (m.find()) {
htmlMatch = htmlMatch.replaceAll("<([a-zA-Z]+)[^<>]*>(.*?)<///1>", "$2");
removeMatchHtmlTag();
}
return htmlMatch;
}
public static String subStringHTML(String param, int length, String endWith) {
if (length < 1) {
return null;
}
if (param.length() < length) {
return param;
}
StringBuffer result = new StringBuffer();
StringBuffer str = new StringBuffer();
int n = 0;
char temp;
boolean isCode = false;
boolean isHTML = false;
for (int i = 0; i < param.length(); i++) {
temp = param.charAt(i);
if (temp == '<') {
isCode = true;
} else if (temp == '&') {
isHTML = true;
} else if (temp == '>' && isCode) {
n = n - 1;
isCode = false;
} else if (temp == ';' && isHTML) {
isHTML = false;
}
if (!isCode && !isHTML) {
n = n + 1;
if ((temp + "").getBytes().length > 1) {
n = n + 1;
}
str.append(temp);
}
result.append(temp);
if (n >= length) {
break;
}
}
result.append(endWith);
String temp_result = result.toString().replaceAll("(>)[^<>]*(<?)", "$1$2");
temp_result = temp_result
.replaceAll(
"<(AREA|BASE|BASEFONT|BODY|BR|COL|COLGROUP|DD|DT|FRAME|HEAD|HR|HTML|IMG|INPUT|ISINDEX|LI|LINK|META|OPTION|P|PARAM|TBODY|TD|TFOOT|TH|THEAD|TR|area|base|basefont|body|br|col|colgroup|dd|dt|frame|head|hr|html|img|input|isindex|li|link|meta|option|p|param|tbody|td|tfoot|th|thead|tr)[^<>]*/>",
"");
htmlMatch = temp_result;
temp_result = removeMatchHtmlTag();
Pattern p = Pattern.compile("<([a-zA-Z]+)[^<>]*>");
Matcher m = p.matcher(temp_result);
List<String> endHTML = new ArrayList<String>();
while (m.find()) {
endHTML.add(m.group(1));
}
for (int i = endHTML.size() - 1; i >= 0; i--) {
result.append("</");
result.append(endHTML.get(i));
result.append(">");
}
return result.toString();
}
}
相关文章推荐
- javascript处理HTML的Encode(转码)和Decode(解码)总结,避免js脚本注入问题
- HTML文章中截取摘要的问题
- PHP截取用UEditor编辑器保存在数据库的文章乱码和包含HTML标签的问题
- HTML文章中截取摘要的问题
- HTMLParser入门_01_网络爬虫的雏形_解析文章和处理文章中的图片
- LoadRunner HTTP脚本迭代处理的常见问题
- 转载一篇文章,这几天有相关问题要处理,学习一下先
- javascript处理后台传过来的html数据转码问题
- LoadRunner HTTP脚本迭代处理的常见问题
- IIS 错误:由于扩展配置问题而无法提供您请求的页面。如果该页面是脚本,请添加处理程序。如果应下载文件,请添加 MIME 映射。
- MVC4.0 sql脚本、跨站脚本(XSS)、跨站伪造请求(CSRF)三种常见安全问题处理
- innerHTML动态添加html代码和脚本兼容性问题处理方法
- svc 报“由于扩展配置问题而无法提供您请求的页面。如果该页面是脚本,请添加处理程序。如果应下载文件,请添加 MIME 映射。“的HTTP 错误 404.3 – Not Found
- java实现带HTML代码的文章摘要截取
- svc 报“由于扩展配置问题而无法提供您请求的页面。如果该页面是脚本,请添加处理程序。如果应下载文件,请添加 MIME 映射。“的HTTP 错误 404.3 – Not Found
- 第一次发博哈我写的处理文章图片大小问题
- 关于javascript脚本修改html页面,未加载完成前出现信息延迟问题
- jersey处理支付宝异步回调通知的问题:java.lang.IllegalArgumentException: Error parsing media type 'application/x-www-form-urlencoded; text/html; charset=UTF-8'
- 处理字符串的一些js/jq方法(去除HTML,去除空格,计算真实长度,截取中英文字符)
- Oracle 关闭session脚本,用于处理表数据被锁定问题