您的位置:首页 > 其它

代理抓取页面,获得访问地址的最终跳转地址

2012-12-17 16:32 176 查看
<%@page import="java.net.URI"%>
<%@page import="java.io.IOException"%>
<%@page import="org.apache.http.HttpHost"%>
<%@page import="org.apache.http.HttpResponse"%>
<%@page import="org.apache.http.HttpStatus"%>
<%@page import="org.apache.http.client.ClientProtocolException"%>
<%@page import="org.apache.http.client.methods.HttpGet"%>
<%@page import="org.apache.http.client.methods.HttpUriRequest"%>
<%@page import="org.apache.http.impl.client.DefaultHttpClient"%>
<%@page import="org.apache.http.protocol.BasicHttpContext"%>
<%@page import="org.apache.http.protocol.ExecutionContext"%>
<%@page import="org.apache.http.protocol.HttpContext"%>
<%@page import="org.apache.http.client.utils.URLEncodedUtils"%>
<%@page import="java.net.URLEncoder"%>
<%@page import="java.io.UnsupportedEncodingException"%>
<%@page import="org.apache.http.impl.client.DefaultRedirectHandler"%>
<%@page import="org.apache.http.ProtocolException"%>
<%@page import="org.apache.http.Header"%>
<%@page import="java.net.URISyntaxException"%>
<%@ taglib uri="http://java.sun.com/jstl/core" prefix="c" %>
<%@ taglib uri="http://www.duxiu.com/proxy" prefix="proxy" %>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<%!
class CustomRedirectHandler extends DefaultRedirectHandler {
@Override
public URI getLocationURI(HttpResponse response, HttpContext context) throws ProtocolException{
if(isRedirectRequested( response, context))
{
Header locationHeader = response.getFirstHeader("location");
String location= locationHeader.getValue();
if(location!=null&&!"".equals(location)&&!location.startsWith("http")&&location.contains("---")){
response.removeHeaders("location");
response.setHeader("location","-----"+location);
URI uri=null;
try {
uri =  new URI("------"+location.substring(0, location.lastIndexOf("url=") + 4)
+ URLEncoder.encode(location.substring(location.indexOf("url=") + 4, location.length())));
} catch (URISyntaxException e) {
e.printStackTrace();
}
return uri;
}
}
return super.getLocationURI(response,context);

}
}
%>
<%!public String test1(String url) {
DefaultHttpClient httpClient = new DefaultHttpClient();
CustomRedirectHandler handler=new CustomRedirectHandler();
httpClient.setRedirectHandler(handler);
HttpGet httpget = new HttpGet(url);
HttpContext context = new BasicHttpContext();
HttpResponse response = null;
try {
response = httpClient.execute(httpget, context);
} catch (ClientProtocolException e1) {
e1.printStackTrace();
} catch (IOException e1) {
e1.printStackTrace();
}
if (response.getStatusLine().getStatusCode() != HttpStatus.SC_OK)
try {
throw new IOException(response.getStatusLine().toString());
} catch (IOException e) {
e.printStackTrace();
}
HttpUriRequest currentReq = (HttpUriRequest) context.getAttribute(ExecutionContext.HTTP_REQUEST);
HttpHost currentHost = (HttpHost) context.getAttribute(ExecutionContext.HTTP_TARGET_HOST);
String currentUrl = (currentReq.getURI().isAbsolute()) ? currentReq.getURI().toString(): (currentHost.toURI() + currentReq.getURI());
return currentUrl;
} %>
<%
String dx = request.getParameter("dx");
if(dx==null||"".equals(dx))
{
out.println("dx为空!");
return;
}
// 获得最终访问地址
String url =dx;
out.println("url="+url);
String finalURL=test1(url);
//out.println("finalURL="+finalURL);
if(!url.equals(finalURL)){
response.sendRedirect("最终跳转地址");
}
%>


继承DefaultRedirectHandler,重写获得URI方法-----
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: