您的位置:首页 > Web前端 > JavaScript

jsoup-处理html中的script数据

2016-06-25 13:38 375 查看
/**
* 价值在线数据-左边分类抓取
* http://www.valueonline.cn/laws/laws?typeid=96219074211635284 * @author hwaggLee
*/
public class UtilsHtmValueonLineType {

public static void main(String[] args) {
String url = "http://www.valueonline.cn/laws/laws?typeid=96219074211635284";
readHtml(url);
}

public static List<Object> readHtml(String url){
List<Object> list  = new ArrayList<Object>();
//
Document doc = null;
try {
doc = Jsoup.connect(url).get();
} catch (Exception e) {
///e.printStackTrace();
System.out.println(e.getMessage()+":--------------->"+url);
}
if( doc == null )return list;
Elements elScripts = doc.getElementsByTag("script");
String[] elScriptList = elScripts.get(0).data().toString().split("var");
String strTypeList = elScriptList[2];
if( StringUtils.isNotBlank(strTypeList)){
/*strTypeList = strTypeList.substring(strTypeList.indexOf("["), strTypeList.lastIndexOf("]")+1);
JSONArray array = JSONArray.fromObject(strTypeList);
JSONArray arrayList = JSONArray.fromObject(array.get(0));
for (Object o : arrayList) {
JSONObject object = JSONObject.fromObject(o);
StringBuilder sb = new StringBuilder();
sb.append("insert into n3b_vl_plate_type values ");
sb.append(" ( ");
sb.append("'"+object.get("id")+"'");
sb.append(",'"+object.get("parentId")+"'");
sb.append(","+object.get("level")+"");
sb.append(",'"+object.get("declareTypeName")+"'");
sb.append(",'"+object.get("declareTypeNo")+"'");
sb.append(",'"+object.get("validFlag")+"'");
sb.append(","+object.get("oftenFlag")+"");
sb.append(",'"+object.get("showTypeName")+"'");
sb.append(" ); ");
System.out.println(sb.toString());
}*/
}

strTypeList = elScriptList[3];
System.out.println(strTypeList);
if( StringUtils.isNotBlank(strTypeList) ){
strTypeList = strTypeList.substring(strTypeList.indexOf("["), strTypeList.lastIndexOf("]")+1);
JSONArray arrayList = JSONArray.fromObject(strTypeList);
for (Object o : arrayList) {
JSONObject object = JSONObject.fromObject(o);
StringBuilder sb = new StringBuilder();
sb.append("insert into n3b_vl_market_type values ");
sb.append(" ( ");
sb.append("'0"+object.get("code_value")+"'");
sb.append(",'"+object.get("code_name")+"'");
sb.append(",'"+object.get("code_no")+"'");
sb.append(",'"+object.get("code_value")+"'");
sb.append(",'"+object.get("valid_flag")+"'");
sb.append(",'"+object.get("version")+"'");
sb.append(",'"+object.get("code_type")+"'");
sb.append(" ); ");
System.out.println(sb.toString());
}
}
return list;
}

}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: