您的位置:首页 > 编程语言 > Java开发

【Java爬虫】爬取南通大学教务系统成绩计算绩点

2015-11-07 16:16 429 查看
  以前写过一个python版的,但是想做一个jsp网页版的,就又用Java有写了一下。

  具体地址的分析过程在这里,这里简单说一下HttpCliet的Get,Post方法的使用

           1.Get请求方法

//创建一个浏览器客户端
CloseableHttpClient httpClient = HttpClients.createDefault();
//要Get的地址
String url1="http://www.baidu.com";
//创建一个Get请求
HttpGet baidu=new HttpGet(url1);
//用上面创建的浏览器客户端执行该请求
CloseableHttpResponse res=httpClient.execute(baidu);
//用响应创建一个http实体并获得输入流
HttpEntity he=res.getEntity();
InputStream in=he.getContent();
//将获得的流写到本地磁盘
FileOutputStream out=new FileOutputStream("baidu.html'");
byte[] buffer=new byte[1024];
int count=-1;
while((count=in.read(buffer))!=-1)
{
out.write(buffer, 0, count);
}
in.close();
out.close();


   2.Post请求方法
CloseableHttpClient httpClient = HttpClients.createDefault();
String url="http://××××.××××.com?#";
//要提交的参数username,password
List<NameValuePair> list = new ArrayList<NameValuePair>();
list.add(new BasicNameValuePair("Username","Name"));
list.add(new BasicNameValuePair("Password","××××××"));
//转换编码
UrlEncodedFormEntity entity = new UrlEncodedFormEntity(list,"utf-8");
//创建Post请求
HttpPost httpPost=new HttpPost(url);
//为请求设置参数
httpPost.setEntity(entity);
//获得响应,输入流并写入本地磁盘
CloseableHttpResponse res=httpClient.execute(httpPost);
HttpEntity he=res.getEntity();
InputStream in=he.getContent();
FileOutputStream out=new FileOutputStream("××××.×××");
byte[] buffer=new byte[1024];
int count=-1;
while((count=in.read(buffer))!=-1)
{
out.write(buffer, 0, count);
}
in.close();
out.close();

爬虫的完整代码:
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicNameValuePair;

import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;
import java.util.regex.Pattern;
import java.util.regex.Matcher;

import org.apache.http.HttpEntity;
import org.apache.http.NameValuePair;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.*;

public class spider02 {
public static void main(String[] args) throws ClientProtocolException, IOException
{
@SuppressWarnings("resource")

Scanner cin=new Scanner(System.in);
doon asd=new doon();
asd.getyzm();
String yzm=cin.nextLine(); //测试
String stop="1";
while(!stop.equals("#"))
{
stop=cin.nextLine();
System.out.println(stop);
if(stop.equals("n"))
{
Matcher name=asd.patternname(asd.getname());
while(name.find())
System.out.println(name.group(1));
}
if(stop.equals("s"))
{
Matcher score=asd.patternscore(asd.getscore());
List<lession> les= asd.workjidian(score);
double jdsum=0,xfsum=0;
for(int i=0;i<les.size();i++)
{
jdsum+=les.get(i).getKcxfjd();
xfsum+=Double.valueOf(les.get(i).getXf()).doubleValue();
System.out.println(les.get(i).getKcmc()+"\t"+les.get(i).getZpcj()+"\t"+les.get(i).getXf()+"\t"+les.get(i).getKcxfjd());
}
System.out.println("所修课程学分:"+xfsum);
System.out.println("所修课程学分绩点:"+jdsum);
System.out.println("平均学分绩点:"+jdsum/xfsum);
}
}
}
}
class doon{
private CloseableHttpClient httpClient = HttpClients.createDefault();
public void done(String xh,String sfzh,String kl,String yzm)
{
try {
login(xh, sfzh, kl, yzm); //尝试登陆
getscore(); //获取分数
} catch (ClientProtocolException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
public String getname()
{
String url="http://jwgl.ntu.edu.cn/cjcx/QueryAll.aspx"; //获取个人信息位置
String information="";
//Post请求
List<NameValuePair> list=new ArrayList<NameValuePair>();
list.add(new BasicNameValuePair("xq","2013-2014-1"));
try {
UrlEncodedFormEntity entity = new UrlEncodedFormEntity(list,"utf-8");
HttpPost post=new HttpPost(url);
post.setEntity(entity);
CloseableHttpResponse res= httpClient.execute(post);
HttpEntity he=res.getEntity();
InputStream in=he.getContent();
//FileOutputStream out=new FileOutputStream("");
byte[] buffer=new byte[1024];
int count=-1;
while((count=in.read(buffer))!=-1)
{
String inf=new String(buffer,0,count);
information+=inf;
}
in.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return information;
}
public void getyzm() throws IOException
{
//获得验证码并写到本地,Get请求
String url1="http://jwgl.ntu.edu.cn/cjcx/checkImage.aspx"; //验证码页面
HttpGet yzm=new HttpGet(url1);
CloseableHttpResponse res=httpClient.execute(yzm);
HttpEntity he=res.getEntity();
InputStream in=he.getContent();
FileOutputStream out=new FileOutputStream("yzm.gif");
byte[] buffer=new byte[1024];
int count=-1;
while((count=in.read(buffer))!=-1)
{
out.write(buffer, 0, count);
}
in.close();
out.close();
}
public void login(String xh,String sfzh,String kl,String yzm) throws ClientProtocolException, IOException
{
//Post请求
String url="http://jwgl.ntu.edu.cn/cjcx/Default.aspx"; //登录页面
List<NameValuePair> list = new ArrayList<NameValuePair>();
list.add(new BasicNameValuePair("__VIEWSTATE","/wEPDwUJODExMDE5NzY5ZGRgtUdRucUbXsT8g55XmVsTwV6PMw=="));
list.add(new BasicNameValuePair("__VIEWSTATEGENERATOR","6C0FF253"));
list.add(new BasicNameValuePair("xh",xh));
list.add(new BasicNameValuePair("sfzh",sfzh));
list.add(new BasicNameValuePair("kl",kl));
list.add(new BasicNameValuePair("yzm",yzm));
UrlEncodedFormEntity entity = new UrlEncodedFormEntity(list,"utf-8");
HttpPost httpPost=new HttpPost(url);
httpPost.setEntity(entity);
CloseableHttpResponse res=httpClient.execute(httpPost);
HttpEntity he=res.getEntity();
InputStream in=he.getContent();
FileOutputStream out=new FileOutputStream("ans.html");
byte[] buffer=new byte[1024];
int count=-1;
while((count=in.read(buffer))!=-1)
{
out.write(buffer, 0, count);
}
in.close();
out.close();
}
public String getscore() throws ClientProtocolException, IOException
{
//Post请求
String url="http://jwgl.ntu.edu.cn/cjcx/Data/ScoreAllData.aspx"; //获取分数
List<NameValuePair> list = new ArrayList<NameValuePair>();
list.add(new BasicNameValuePair("start","0"));
list.add(new BasicNameValuePair("pageSize","80"));
UrlEncodedFormEntity entity = new UrlEncodedFormEntity(list,"utf-8");
HttpPost httpPost=new HttpPost(url);
httpPost.setEntity(entity);
CloseableHttpResponse res=httpClient.execute(httpPost);
HttpEntity he=res.getEntity();
InputStream in=he.getContent();
FileOutputStream out=new FileOutputStream("score.html");
byte[] buffer=new byte[1024];
int count=-1;
String save="";
while((count=in.read(buffer))!=-1)
{
out.write(buffer, 0, count);
String sav=new String(buffer,0,count);
save+=sav;
}
in.close();
out.close();
return save;
}
public Matcher patternscore(String score)
{
//用正则表达式匹配成绩
String reg="\"kcmc\":\"(.*?)\",\"jsxm\":\"(.*?)\",\"xq\":\"(.*?)\",\"xs\":\"(.*?)\",\"xf\":\"(.*?)\",\"zpcj\":\"(.*?)\",\"pscj\":\"(.*?)\",\"qmcj\":\"(.*?)\",\"kcsx\":\"(.*?)\",\"cjid\":\"(.*?)\",\"ksfsm\":\"(.*?)\",\"pxcj\":\"(.*?)\"}";
Pattern p=Pattern.compile(reg);
Matcher m=p.matcher(score);
return m;
}
public Matcher patternname(String name)
{
//匹配个人信息
String reg="<b>(.*?)</b>";
Pattern p=Pattern.compile(reg);
Matcher m=p.matcher(name);
return m;
}
public List<lession> workjidian(Matcher score)
{
//计算绩点
List<lession> les=new ArrayList<lession>();
while(score.find())
{
double xf=0.0;
if(score.group(6).equals("优")) //五级计分
xf=Double.valueOf(score.group(5)).doubleValue()*4.5;
else if(score.group(6).equals("良"))
xf=Double.valueOf(score.group(5)).doubleValue()*3.5;
else if(score.group(6).equals("中"))
xf=Double.valueOf(score.group(5)).doubleValue()*2.5;
else if(score.group(6).equals("及格"))
xf=Double.valueOf(score.group(5)).doubleValue()*1.5;
else if(score.group(6).equals("缓考")||score.group(6).equals("不及格"))
continue;
else if(Double.valueOf(score.group(6)).doubleValue()>=90) //百分计分
xf=((Double.valueOf(score.group(6)).doubleValue()-90)/10+4.0)*Double.valueOf(score.group(5)).doubleValue();
else if(Double.valueOf(score.group(6)).doubleValue()>=80&&Double.valueOf(score.group(6)).doubleValue()<=89)
xf=((Double.valueOf(score.group(6)).doubleValue()-80)/10+3.0)*Double.valueOf(score.group(5)).doubleValue();
else if(Double.valueOf(score.group(6)).doubleValue()>=70&&Double.valueOf(score.group(6)).doubleValue()<=79)
xf=((Double.valueOf(score.group(6)).doubleValue()-70)/10+2.0)*Double.valueOf(score.group(5)).doubleValue();
else if(Double.valueOf(score.group(6)).doubleValue()>=60&&Double.valueOf(score.group(6)).doubleValue()<=69)
xf=((Double.valueOf(score.group(6)).doubleValue()-60)/10+1.0)*Double.valueOf(score.group(5)).doubleValue();
else if(Double.valueOf(score.group(6)).doubleValue()<60)
continue;
les.add(new lession(score.group(1),score.group(2),score.group(3),score.group(4),score.group(5),score.group(6),score.group(7),score.group(8),score.group(9),score.group(10),score.group(11),score.group(12),xf));

// System.out.println(score.group(1)+"\t\t\t\t\t\t"+score.group(2)+"\t"
// +score.group(5)+"\t"+score.group(6)+"学分"+Double.toString(xf));
}
return les;
}
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息