您的位置:首页 > 理论基础 > 计算机网络

HttpClient和HtmlParser配合实现自动CAS单点登录系统抽取页面信息

2015-11-27 17:40 1181 查看
httpclient下载地址:http://mirror.bit.edu.cn/apache//httpcomponents/httpclient/binary/httpcomponents-client-4.5.1-bin.zip

项目中引入所有的jar包,然后看下面的代码

package org.apache.http.examples.client;

import java.io.BufferedReader;

import java.io.IOException;

import java.io.InputStreamReader;

import java.net.URI;

import java.util.List;

import org.apache.http.HttpEntity;

import org.apache.http.HttpResponse;

import org.apache.http.client.HttpClient;

import org.apache.http.client.methods.CloseableHttpResponse;

import org.apache.http.client.methods.HttpGet;

import org.apache.http.client.methods.HttpUriRequest;

import org.apache.http.client.methods.RequestBuilder;

import org.apache.http.cookie.Cookie;

import org.apache.http.impl.client.BasicCookieStore;

import org.apache.http.impl.client.CloseableHttpClient;

import org.apache.http.impl.client.HttpClients;

import org.apache.http.util.EntityUtils;

import org.htmlparser.Node;

import org.htmlparser.NodeFilter;

import org.htmlparser.Parser;

import org.htmlparser.filters.TagNameFilter;

import org.htmlparser.tags.TableColumn;

import org.htmlparser.tags.TableRow;

import org.htmlparser.tags.TableTag;

import org.htmlparser.util.NodeList;

import org.htmlparser.util.ParserException;

import org.htmlparser.util.SimpleNodeIterator;

/**

* A example that demonstrates how HttpClient APIs can be used to perform

* form-based logon. 一个例子,演示了如何HttpClient API可用于执行基于表单的登录。

*/

public class ClientFormLogin {

private static final String CAS_URI = "http://192.168.1.121:8088/cas/login";

private static final String USERNAME = "admin";

private static final String PASSWORD = "123456";

private static final String REQ_URI = "http://192.168.1.121:8089/szxy/oa/oasfwj/list";

public static void main(String[] args) throws Exception {

BasicCookieStore cookieStore = new BasicCookieStore();

CloseableHttpClient httpclient = HttpClients.custom()

.setDefaultCookieStore(cookieStore).build();

try {

HttpGet httpget = new HttpGet(CAS_URI);

CloseableHttpResponse response1 = httpclient.execute(httpget);

try {

HttpEntity entity = response1.getEntity();

System.out.println("Login form get: "

+ response1.getStatusLine());

EntityUtils.consume(entity);

System.out.println("Initial set of cookies:");

List<Cookie> cookies = cookieStore.getCookies();

if (cookies.isEmpty()) {

System.out.println("None");

} else {

for (int i = 0; i < cookies.size(); i++) {

System.out.println("- " + cookies.get(i).toString());

}

}

} finally {

response1.close();

}

HttpUriRequest login = RequestBuilder.post()

.setUri(new URI(CAS_URI))

.addParameter("username", USERNAME)

.addParameter("password", PASSWORD)

.addParameter("lt", doCasLoginRequest(httpclient, CAS_URI))

.addParameter("_eventId", "submit")

.addParameter("submit", "登录")

.addParameter("execution", "e2s1").build();

CloseableHttpResponse response2 = httpclient.execute(login);

HttpGet httppost = new HttpGet(REQ_URI);

CloseableHttpResponse response3 = httpclient.execute(httppost);

System.out.println("请求访问地址状态码: " + response3.getStatusLine());

// System.out.println(EntityUtils.toString(response3.getEntity()));

String body=EntityUtils.toString(response3.getEntity());

parseHtml(body);

try {

HttpEntity entity = response2.getEntity();

System.out.println("Login form get: "

+ response2.getStatusLine());

EntityUtils.consume(entity);

System.out.println("Post logon cookies:");

List<Cookie> cookies = cookieStore.getCookies();

if (cookies.isEmpty()) {

System.out.println("None");

} else {

for (int i = 0; i < cookies.size(); i++) {

System.out.println("- " + cookies.get(i).toString());

}

}

} finally {

response2.close();

}

} finally {

httpclient.close();

}

}

/**

* @Method parseHtml

* @Function 功能描述:解析html

* @param body

* @return

* @throws ParserException

* @Date 2015年11月27日

*/

private static String parseHtml(String body) throws ParserException{

Parser parser = Parser.createParser(body, "UTF-8");

String filterStr = "table";

NodeFilter filter = new TagNameFilter(filterStr);

NodeList tables = parser.extractAllNodesThatMatch(filter);

//找到单位列表所在的表格

TableTag tabletag = (TableTag) tables.elementAt(1);

TableRow row = tabletag.getRow(1);

TableColumn[] cols = row.getColumns();

//System.out.println("单位名称:" + cols[2].toHtml());

System.out.println("单位名称:" + cols[1].childAt(0).getText());

return filterStr;

}

private static void processNodeList(NodeList list, String keyword) {

//迭代开始

SimpleNodeIterator iterator = list.elements();

while (iterator.hasMoreNodes()) {

Node node = iterator.nextNode();

//得到该节点的子节点列表

NodeList childList = node.getChildren();

//孩子节点为空,说明是值节点

if (null == childList)

{

//得到值节点的值

String result = node.toPlainTextString();

//若包含关键字,则简单打印出来文本

if (result.indexOf(keyword) != -1)

System.out.println(result);

} //end if

//孩子节点不为空,继续迭代该孩子节点

else

{

processNodeList(childList, keyword);

}//end else

}//end wile

}

private static String doCasLoginRequest(HttpClient httpclient, String url)

throws IOException {

String result = "";

HttpGet httpget = new HttpGet(url);

HttpResponse response = httpclient.execute(httpget);

HttpEntity entity = response.getEntity();

BufferedReader rd = new BufferedReader(new InputStreamReader(

entity.getContent(), "UTF-8"));

String tempLine = rd.readLine();

String s = "<input type=\"hidden\" name=\"lt\" value=\"";

while (tempLine != null) {

int index = tempLine.indexOf(s);

if (index != -1) {

String s1 = tempLine.substring(index + s.length());

int index1 = s1.indexOf("\"");

if (index1 != -1)

result = s1.substring(0, index1);

}

tempLine = rd.readLine();

}

if (entity != null) {

entity.consumeContent();

}

return result;

}

}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: