您的位置:首页 > 编程语言 > Python开发

【python爬虫】requests爬取BiliBili(b站)站视频

2020-06-01 05:10 357 查看
import requests
import re
import os

class BiliBiliSpider():
def __init__(self, av_num):
self.av_num = av_num
self.url = 'https://www.bilibili.com/'+self.av_num
self.headers = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36'
}
self.html_response = requests.get(url=self.url, headers=self.headers)

def Downloads_videos(self):
if self.html_response.status_code == 200:
api_headers = {
'Range': 'bytes=0-999999999999999',
'Referer': 'https://www.bilibili.com/video/ShouHuWoMenZuiHaoDePiliPili',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36'
}
html = self.html_response.text
pattern = r'"base_url":"(.*?)"'
urls = re.findall(pattern=pattern, string=html)
if urls.__len__() == 0:
pattern = r'"url":"(.*?)"'
urls = re.findall(pattern=pattern, string=html)
dir_path = './BiliBili_videos_'+self.av_num
print('下载路径:'+dir_path)
os.mkdir(dir_path)
print('URL总数:{}'.format(urls.__len__()))
for n in range(urls.__len__()):
try:
print('正在通过URL请求数据:'+urls[n])
with open(dir_path + '/video-' + str(n + 1) + '.mp4', 'wb') as f:
with requests.get(url=urls[n], headers=api_headers, stream=True) as response:
filesize = response.headers["Content-Length"]
chunk_size = 128
for chunk in response.iter_content(chunk_size=chunk_size):
f.write(chunk)
print('写入完成,数据大小: {} bytes'.format(filesize))
except Exception as e:
print(e)

if __name__ == '__main__':
BiliBiliSpider(av_num='av*********').Downloads_videos()

爬取的视频和音频是分离的,合并:

ffmpeg -i $1 -i $2 -vcodec copy -acodec copy $3
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: