您的位置:首页 > 其它

天猫淘宝评论数据抓取

2017-12-07 19:13 375 查看
import requests
import re,json
import pandas

class base():
def __init__(self,url):
self.url = url

def all_url(self):
return [self.url + "%s" % i for i in range(1,100)]

def loads_jsonp(self,_jsonp):
try:
return json.loads(re.match(".*?({.*}).*",_jsonp,re.S).group(1))
except:
raise ValueError('Invalid Input')

def url_req(self,url):
content = requests.get(url).text
aa = self.loads_jsonp(content)
return aa

def taobao_comment(self,data):
for i in data['comments']:
data = {}
data['昵称']=i['user']['nick']
data['评论']=i['content']
info_list.append(data)

def tianmao_comment(self,data):
for i in data['rateList']:
data = {}
data['昵称']=i['displayUserNick']
data['评论']=i['rateContent']
info_list.append(data)

def comment(self,url):
data = self.url_req(url)
self.tianmao_comment(data) if 'tmall' in url else self.taobao_comment(data)

def main(url):
data = base(url)
for i in data.all_url():
data.comment(i)
print(len(info_list))

if __name__ == "__main__":
url = 'https://rate.tmall.com/list_detail_rate.htm?itemId=39258348512&spuId=250685252&sellerId=2106913388&order=3¤tPage='
info_list = []
main(url)
df =pandas.DataFrame(info_list)
df.to_excel('comments.xlsx',index=False)
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: