您的位置:首页 > 编程语言 > Java开发

java利用url解析网页内容并模拟手动form提交数据

2013-03-19 14:02 656 查看
package com.test;

import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;
import java.nio.charset.Charset;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;

/**
*
*
* @author: ZhouQiShan
* @since: 2011-06-09
*/

public class CookieUtil {

public final static String CONTENT_TYPE = "Content-Type";
public static void setProxy(String host, String port) {
System.setProperty("proxySet", "true");
System.setProperty("proxyHost", host);
System.setProperty("proxyPort", port);
}
public static Content curl(String method, String sUrl,
Map<String, String> paramMap, Map<String, String> requestHeaderMap,
boolean isOnlyReturnHeader,String path) {
Content content = null;
HttpURLConnection httpUrlConnection = null;
InputStream in = null;
setProxy("172.16.55.51", "808");
try {
URL url = new URL(sUrl);
boolean isPost = "POST".equals(method);
if (method == null
|| (!"GET".equalsIgnoreCase(method) && !"POST"
.equalsIgnoreCase(method))) {
method = "POST";
}
URL resolvedURL = url;
if ("GET".equals(method) && paramMap != null) {
boolean firstParam = true;
StringBuffer newUrlBuffer = new StringBuffer(url
.toExternalForm());
if (url.getQuery() == null) {
newUrlBuffer.append("?");
} else {
newUrlBuffer.append("&");
}
for (Map.Entry<String, String> entry : paramMap.entrySet()) {
String encName = URLEncoder.encode(entry.getKey(), "UTF-8");
if (firstParam) {
firstParam = false;
} else {
newUrlBuffer.append("&");
}
String encValue = URLEncoder.encode(entry.getValue(),
"UTF-8");
newUrlBuffer.append(encName);
newUrlBuffer.append("=");
newUrlBuffer.append(encValue);
}
resolvedURL = new java.net.URL(newUrlBuffer.toString());
}
URLConnection urlConnection = resolvedURL.openConnection();
httpUrlConnection = (HttpURLConnection) urlConnection;
httpUrlConnection.setRequestMethod(method);
httpUrlConnection.setRequestProperty("Accept-Language", "zh-cn,zh;q=0.5");
// Do not follow redirects, We will handle redirects ourself
httpUrlConnection.setInstanceFollowRedirects(false);
urlConnection.setDoOutput(true);
urlConnection.setDoInput(true);
urlConnection.setConnectTimeout(5000);
urlConnection.setReadTimeout(5000);
urlConnection.setUseCaches(false);
urlConnection.setDefaultUseCaches(false);
// set request header
if (requestHeaderMap != null) {
for (Map.Entry<String, String> entry : requestHeaderMap
.entrySet()) {
String key = entry.getKey();
String val = entry.getValue();
if (key != null && val != null) {
urlConnection.setRequestProperty(key, val);
}
}
}
if (isPost) {
urlConnection.setDoOutput(true);
ByteArrayOutputStream bufOut = new ByteArrayOutputStream();
boolean firstParam = true;
for (Map.Entry<String, String> entry : paramMap.entrySet()) {
String encName = URLEncoder.encode(entry.getKey(), "UTF-8");
if (firstParam) {
firstParam = false;
} else {
bufOut.write((byte) '&');
}
String encValue = URLEncoder.encode(entry.getValue(),
"UTF-8");
bufOut.write(encName.getBytes("UTF-8"));
bufOut.write((byte) '=');
bufOut.write(encValue.getBytes("UTF-8"));
}
byte[] postContent = bufOut.toByteArray();
if (urlConnection instanceof HttpURLConnection) {
((HttpURLConnection) urlConnection)
.setFixedLengthStreamingMode(postContent.length);
}
OutputStream postOut = urlConnection.getOutputStream();
postOut.write(postContent);
postOut.flush();
postOut.close();
}
httpUrlConnection.connect();
int responseCode = httpUrlConnection.getResponseCode();

// We handle redirects ourself
if (responseCode == HttpURLConnection.HTTP_MOVED_PERM
|| responseCode == HttpURLConnection.HTTP_MOVED_TEMP) {
String location = httpUrlConnection.getHeaderField("Location");
URL newAction = new URL(url, location);
// Recurse
StringBuffer newUrlSb = new StringBuffer(newAction
.getProtocol()
+ "://" + newAction.getHost());
if (newAction.getPort() != -1) {
newUrlSb.append(":" + newAction.getPort());
}
if (newAction.getPath() != null) {
newUrlSb.append(newAction.getPath());
}
if (newAction.getQuery() != null) {
newUrlSb.append("?" + newAction.getQuery());
}
if (newAction.getRef() != null) {
newUrlSb.append("#" + newAction.getRef());
}

return curl("POST", newUrlSb.toString(), paramMap, requestHeaderMap,
isOnlyReturnHeader,path);
} else if (responseCode == HttpURLConnection.HTTP_OK
|| responseCode == HttpURLConnection.HTTP_CREATED) {
byte[] bytes = new byte[0];
if (!isOnlyReturnHeader) {
if(isPost){
in = httpUrlConnection.getInputStream();
ByteArrayOutputStream bout = new ByteArrayOutputStream();
byte[] buf = new byte[1024];
while (true) {
int rc = in.read(buf);
if (rc <= 0) {
break;
} else {
bout.write(buf, 0, rc);
}
}
bytes = bout.toByteArray();
in.close();
}else{
DataInputStream ins = new DataInputStream(httpUrlConnection
.getInputStream());
//验证码的位置
DataOutputStream out = new DataOutputStream(new FileOutputStream(
path+"/code.bmp"));
byte[] buffer = new byte[4096];
int count = 0;
while ((count = ins.read(buffer)) > 0) {
out.write(buffer, 0, count);
}

out.close();
ins.close();
}
}
// only fetch Content-Length and Last-Modified header
String encoding = null;
if (encoding == null) {
encoding = getEncodingFromContentType(httpUrlConnection
.getHeaderField(CONTENT_TYPE));
}

content = new Content(sUrl, new String(bytes, encoding),
httpUrlConnection.getHeaderFields());
}
} catch (Exception e) {
return null;
} finally {
if (httpUrlConnection != null) {
httpUrlConnection.disconnect();
}
}
return content;
}

public static String getEncodingFromContentType(String contentType) {
String encoding = null;
if (contentType == null) {
return null;
}
StringTokenizer tok = new StringTokenizer(contentType, ";");
if (tok.hasMoreTokens()) {
tok.nextToken();
while (tok.hasMoreTokens()) {
String assignment = tok.nextToken().trim();
int eqIdx = assignment.indexOf('=');
if (eqIdx != -1) {
String varName = assignment.substring(0, eqIdx).trim();
if ("charset".equalsIgnoreCase(varName)) {
String varValue = assignment.substring(eqIdx + 1)
.trim();
if (varValue.startsWith("\"")
&& varValue.endsWith("\"")) {
// substring works on indices
varValue = varValue.substring(1,
varValue.length() - 1);
}
if (Charset.isSupported(varValue)) {
encoding = varValue;
}
}
}
}
}
if (encoding == null) {
return "UTF-8";
}
return encoding;
}

public static void main(String[] args) {
// login
//System.out.println(getHtmlReadLine("http://www.zhaopin.com/"));
//验证码的位置
Content content = curl("GET", "http://www.haoshijia.com.cn/register/index/verify", null, null, false,"d:/");

// build request headers & do rate of user review
List<String> lsit = content.getHeaders().get("Set-Cookie");
Map<String, String> resmap = new HashMap<String, String>();
if (lsit != null) {
StringBuffer sb = new StringBuffer();
boolean isLast = false;
int i = 0;
for (String val : lsit) {
i++;
if (i == lsit.size()) {
isLast = true;
}
int pos = val.indexOf("=");
if (pos != -1) {
String cookieName = val.substring(0, pos);
String cookieVal = val.substring(pos + 1);
System.out.println(cookieName+":"+cookieVal);
cookieVal = cookieVal.split(";")[0];
if (isLast) {
sb.append(cookieName + "=" + cookieVal);
} else {
sb.append(cookieName + "=" + cookieVal + ";");
}
}
}			System.out.println(sb.toString());
resmap.put("Cookie", sb.toString());
}
String a="";
System.out.print("请输入验证码:");
BufferedReader strin=new BufferedReader(new InputStreamReader(System.in));
try {
a=strin.readLine();
}  catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
System.out.println("输入的数是:"+a);

String email = "xsl0218";
String pass = "zqs021823";
String loginUrl = "http://www.haoshijia.com.cn/register/index/logincheck";
String rateReviewUrl = "http://www.haoshijia.com.cn/member/index/index";
Map<String, String> paramMap = new HashMap<String, String>();
paramMap.put("login_name", email);
paramMap.put("login_password", pass);
//paramMap.put("login", "1");
paramMap.put("login_code", a+"");
content = curl("POST", loginUrl, paramMap, resmap, false,"");
//System.out.println(content.getBody());
// build request headers & do rate of user review
paramMap = new HashMap<String, String>();

content = curl("POST", rateReviewUrl, paramMap, resmap, false,"");
inFile(content.getBody(), "D:/sss.txt");
System.out.println(content.getBody());
}

// 这个是输出
public static boolean inFile(String content, String path) {
PrintWriter out = null;
File file = new File(path);
try {
if (!file.exists()) {
file.createNewFile();
}
out = new PrintWriter(new FileWriter(file));

out.write(content);
out.flush();
return true;
} catch (Exception e) {
e.printStackTrace();
} finally {
out.close();
}
return false;
}

public static String getHtmlReadLine(String httpurl){
String CurrentLine="";
String TotalString="";
InputStream urlStream;
String content="";

try {
URL url = new URL(httpurl);

HttpURLConnection connection = (HttpURLConnection)url.openConnection();

connection.connect();
System.out.println(connection.getResponseCode());
urlStream = connection.getInputStream();

BufferedReader reader = new BufferedReader(

new InputStreamReader(urlStream,"utf-8"));

while ((CurrentLine = reader.readLine()) != null) {

TotalString += CurrentLine+"\n";
}

content = TotalString;

} catch (Exception e) {}

return content;
}
}

class Content {
private String url;
private String body;
private Map<String, List<String>> m_mHeaders = new HashMap<String, List<String>>();

public Content(String url, String body, Map<String, List<String>> headers) {
this.url = url;
this.body = body;
this.m_mHeaders = headers;
}

public String getUrl() {
return url;
}

public String getBody() {
return body;
}

public Map<String, List<String>> getHeaders() {
return m_mHeaders;
}

}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: