您的位置:首页 > 编程语言 > Python开发

Python实例之抓取网易云课堂搜索数据(post方式json型数据)并保存到数据库

2017-05-30 16:35 591 查看
  本实例实现了抓取网易云课堂中以‘java’为关键字的搜索结果,经详细查看请求的方式为post,请求的结果为JSON数据

具体实现代码如下:

import requests
import json
import pymysql
conn = pymysql.connect(host='localhost',
port=3306,
user='root',
passwd='123456',
db='test',
charset='utf8')     #连接数据库
cur = conn.cursor()

totlePage = 0                #初始化总页数
test = 0                    #初始化数据总条数
url = 'http://study.163.com/p/search/studycourse.json'
headers = {'content-type': 'application/json'}

def getData(count):         #定义一个方法,返回json型请求结果
payload = {
'pageIndex':count,        #页码为变量
'pageSize':'50',
'keyword':'java',
'searchTimeType':'-1',
'orderType':'5',
'priceType':'-1'
}
req = requests.post(url,data=json.dumps(payload),headers=headers)
res_json = json.loads(req.text)
return res_json

cur.execute("DROP TABLE IF EXISTS neteasy")     #如果表存在就删除
sqlc = "create table neteasy(id int(5),title varchar(50) CHARACTER SET utf8 COLLATE utf8_general_ci,\
provider varchar(30) CHARACTER SET utf8 COLLATE utf8_general_ci,price float(10),\
learnercount int(5)) CHARACTER SET utf8 COLLATE utf8_general_ci"
cur.execute(sqlc)           #创建表

final = getData(1)['result']['list']        #判断是否有搜索结果
if final != None:
totlePage = getData(1)['result']['query']['totlePageCount']       #获取页码总数
for j in range(1,totlePage+1):         #页码循环
final = getData(j)['result']['list']
for i in range(len(final)):        #每页中的数据项循环
rt = final[i]['productName']
rp = final[i]['provider']
strpri = final[i]['originalPrice']
if final[i]['discountPrice'] != None:
strpri = final[i]['discountPrice']
rn = strpri
strcou = final[i]['learnerCount']
if strcou == None:
strcou = 0
rd = strcou
print('当前正在读取第'+str(j)+'页的第'+str(i+1)+'条数据...')
test += 1
sqli = 'insert into neteasy values(%s,%s,%s,%s,%s)'
cur.execute(sqli,(test,rt,rp,rn,rd))              #插入数据
print('保存完毕!共'+str(test)+'条数据')
cur.close()
conn.commit()
conn.close()
else:
print('没有查询结果,请换个关键词试试!')



                                            
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: