您的位置:首页 > 编程语言 > Python开发

python学习 爬取豆瓣电影名称 及评分

2017-12-16 07:42 507 查看
import requests
from bs4 import BeautifulSoup
import bs4
import re
def getHTMLText(url):
try:
r = requests.get(url)
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
except:
return ""

def fillUnivList(ulist,rlist,html):
count=0
soup = BeautifulSoup(html,"html.parser")
for tg in soup.find_all("div","pl2"):

name = tg.find("a")
ulist.append(name.text.split()[0])

if tg.find_all("span",re.compile("nums")):
rate = tg.find("span",attrs={"class":"rating_nums"})
rlist.append(rate.text)
else:
rlist.append("无评价")

print ("{}  :   {}".format(ulist[count],rlist[count]))
count+=1

def main():
sumz=0
lst=[]
while sumz<=980:
lst.append(sumz)
sumz=sumz+20
for n in lst:
uinfo = []
rinfo=[]
url = "https://movie.douban.com/tag/中国电影?start="+str(n)+"&type=T"
html = getHTMLText(url)
fillUnivList(uinfo,rinfo, html)

main()
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  python 豆瓣 url