您的位置：首页 > 编程语言 > Python开发

python 网络爬虫-批量打包下载小说

2019-06-15 21:39 501 查看

import urllib.request
import re
import os
import time

mulu='https://www.9dxs.com/2/2308/index.html'
response = urllib.request.urlopen(mulu)
html=response.read().decode('gbk')

def get_zhang(lianjie,biaoti):
zhang ='https://www.9dxs.com/2/2308/'+lianjie
response = urllib.request.urlopen(zhang)
html=response.read().decode('gbk')
pattern=re.compile(u'(<div id="content" class="content">)(.*?)(</div>.*<div class="chapterpage">)',re.S)
zhang_html=pattern.findall(html)
for zhengwen in zhang_html:
text=re.sub( '<.*?>', '', zhengwen[1])
text=re.sub( '&nbsp;', '  ', text)
return text

def baocun(biaoti,zhengwen):
fo = open('帝临鸿蒙.txt', "a+")         #打开小说文件
fo.write('\r\n' + biaoti + '\r\n'+zhengwen)
fo.close()

pattern=re.compile(u'<li><a href="(.*?)">(.*?)</a></li>')
mulu=pattern.findall(html)
for zhang in mulu:
biaoti=zhang[1]
zhengwen=get_zhang(zhang[0],zhang[1])
baocun(biaoti,zhengwen)
time.sleep(5)  #不要太快防止给人家造成负担

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航