您的位置:首页 > 编程语言 > Java开发

java实现有规律的字符串转Json格式

2016-09-23 00:00 423 查看
package net.sc.common.util;

import java.io.File;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Attribute;
import org.jsoup.nodes.Attributes;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.select.Elements;

/**
* @author Aaron
* @createTime 2015-08-07
* @desc 支持多属性的选择
*/
public class JsoupUtil {

public static final String _REGEX = "\\[(?<name>\\w+)=(?<value>\\w+)\\]";

Pattern p;

public JsoupUtil() {
p = Pattern.compile(_REGEX);
}

public Elements findElement(Element queryElement, String... attrs) {
return this.findElement(queryElement, false, attrs);
}

// TODO only只匹配了等于的情况,未匹配开始和结束的情况
public Elements findElement(Element queryElement, boolean only, String... attrs) {
Elements result = new Elements();
if (queryElement == null) {
return result;
}
if (attrs.length == 0) {
return result;
}

Elements searchElements = new Elements(queryElement);
Elements middleResults = new Elements();
for (int i = 0; i < attrs.length; i++) {
for (Element search : searchElements) {
middleResults.addAll(search.select(attrs[i]));
}
if (middleResults.size() == 0 && i < attrs.length - 1) {
return result;
}
searchElements = middleResults;
middleResults = new Elements();
}
result = searchElements;
if (!only) {
return result;
}
Map<String, String> searchAttrs = new LinkedHashMap<String, String>();
for (int i = 0; i < attrs.length; i++) {
Matcher m = p.matcher(attrs[i]);
// 未发现属性的情况下,返回原结果集
if (m.find()) {
searchAttrs.put(m.group("name"), m.group("value"));
} else {
return result;
}
}
result = new Elements();
for (Element element : searchElements) {
Attributes attriList = element.attributes();
if (attriList.size() != searchAttrs.size()) {
continue;
}
boolean pass = false;
for (Attribute attri : attriList) {
if (!searchAttrs.containsKey(attri.getKey())) {
pass = true;
break;
}
}
if (pass) {
continue;
}
result.add(element);
}
return result;
}

// 获取子元素的相关数据
public Map<String, String> getPropertyEle(Elements eles) {
Map<String, String> map = new LinkedHashMap<>();
for (Element ele : eles) {
Elements childEles = ele.children();
if (childEles.size() != 2) {
continue;
}
String name = childEles.get(0).text(), value = childEles.get(1).text();
if (name.endsWith(":") || name.endsWith(":")) {
name = name.substring(0, name.length() - 1);
}
map.put(name.trim(), value.trim());
}
return map;
}

// 获取以冒号分隔的相关数据
public Map<String, String> getPropertyColon(List<Node> nodeList) {
Map<String, String> map = new LinkedHashMap<>();
for (Node node : nodeList) {
String text = node.toString();
String name = "", value = "";
if (text.indexOf(":") != -1) {
name = text.substring(0, text.indexOf(":"));
value = text.substring(text.indexOf(":") + 1);
} else if (text.indexOf(":") != -1) {
name = text.substring(0, text.indexOf(":"));
value = text.substring(text.indexOf(":") + 1);
}

if (StringUtil.isEmpty(name)) {
continue;
}
if (name.indexOf("onclick=") != -1) {
continue;
}
map.put(name.trim(), value.trim());
}
return map;
}

// 获取以冒号分隔的相关数据
public Map<String, String> getPropertyColon(Elements eles) {
Map<String, String> map = new LinkedHashMap<>();
for (Element ele : eles) {
String text = ele.text();
String name = "", value = "";
if (text.indexOf(":") != -1) {
name = text.substring(0, text.indexOf(":"));
value = text.substring(text.indexOf(":") + 1);
} else if (text.indexOf(":") != -1) {
name = text.substring(0, text.indexOf(":"));
value = text.substring(text.indexOf(":") + 1);
}

if (StringUtil.isEmpty(name)) {
continue;
}
map.put(name.trim(), value.trim());
}
return map;
}

public Map<String, String> getTableColumnData(Element table) {
return this.getTableColumnData(table, "");
}

public Map<String, String> getTableColumnData(Element table, String rowSelectRange) {
Map<String, String> map = new LinkedHashMap<>();
Elements trs = table.select("tr");
if (!StringUtil.isEmpty(rowSelectRange)) {
String[] deleteRows = rowSelectRange.split(",");
System.out.println(deleteRows);
int offsetIndex = 0;
for (int i = deleteRows.length - 1; i >= 0; i--) {
int index = Integer.parseInt(deleteRows[i]);
if (index < 0) {
index = Math.abs(index);
index = trs.size() - (index - offsetIndex);
trs.remove(index);
offsetIndex++;
} else {
trs.remove(index - 1);
}
}
}
for (Element tr : trs) {
Elements tds = tr.select("td");
// th 和 td 混合的情况下,取子元素
if (tr.select("th").size() > 0) {
tds = tr.children();
}
int index = 0;
String name = "";
for (Element td : tds) {
index++;
if (index % 2 == 0) {
if (StringUtil.isEmpty(name)) {
continue;
}
map.put(name, td.text().trim());
} else {
name = td.text();
if (name.endsWith(":") || name.endsWith(":")) {
name = name.substring(0, name.length() - 1);
}
name = name.trim();
}
}
}
return map;
}

public List<Map<String, String>> getTableRowData(Element table) {
return getTableRowData(table, null, "", "");
}

public List<Map<String, String>> getTableRowData(Element table, String rowSelectRange, String columnSelectRange) {
return getTableRowData(table, null, rowSelectRange, columnSelectRange);
}

// rowSelectRange 表示要去除的行
public List<Map<String, String>> getTableRowData(Element table, List<String> selfNameList, String rowSelectRange, String columnSelectRange) {
Elements elements = table.select("tr");
if (!StringUtil.isEmpty(rowSelectRange)) {
String[] deleteRows = rowSelectRange.split(",");
int offsetIndex = 0;
for (int i = deleteRows.length - 1; i >= 0; i--) {
int index = Integer.parseInt(deleteRows[i]);
if (index < 0) {
index = Math.abs(index);
index = elements.size() - (index - offsetIndex);
elements.remove(index);
offsetIndex++;
} else {
elements.remove(index - 1);
}
}
}
int counter = 0;
List<String> nameList = new ArrayList<>();
if (selfNameList != null && selfNameList.size() > 0) {
nameList = selfNameList;
}
List<Map<String, String>> valueList = new ArrayList<>();
for (Element element : elements) {
counter++;
Elements tds = element.select("td");
if (tds == null || tds.size() == 0) {
tds = element.select("th");
}
if (!StringUtil.isEmpty(columnSelectRange)) {
String[] deleteColumns = columnSelectRange.split(",");
int offsetIndex = 0;
for (int i = deleteColumns.length - 1; i >= 0; i--) {
int index = Integer.parseInt(deleteColumns[i]);
if (index < 0) {
index = Math.abs(index);
index = tds.size() - (index - offsetIndex);
tds.remove(index);
offsetIndex++;
} else {
tds.remove(index - 1);
}
}
}
Map<String, String> pvm = new LinkedHashMap<>();
int index = 0;
for (Element td : tds) {
if (counter == 1 && (selfNameList == null || selfNameList.size() == 0)) {
nameList.add(td.text().trim());
} else if (counter == 1 && selfNameList != null && selfNameList.size() > 0) {
pvm.put(nameList.get(index), td.text());
} else {
pvm.put(nameList.get(index), td.text());
}
index++;
}
if (pvm.size() > 0) {
valueList.add(pvm);
}
}
return valueList;
}

public static void main(String args[]) throws Exception {
//		JsoupUtil ju = new JsoupUtil();
//		String path = ju.getClass().getResource("").getPath() + "JsoupUtil.html";
//		Document doc = Jsoup.parse(new File(path), "UTF-8");
//		Elements results = ju.findElement(doc, true, "tr[width=100]");
//		for (Element result : results) {
//			System.out.println(result.text());
//		}

//		JsoupUtil ju = new JsoupUtil();
//		String path = ju.getClass().getResource("").getPath() + "JsoupUtil.html";
//		Document doc = Jsoup.parse(new File(path), "UTF-8");
//		List<Map<String, String>> list = ju.getTableRowData(doc.select("table[class=tb6]").get(0));
//		System.out.println(list);

}
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  java JSON jsoup