您的位置:首页 > 数据库 > MySQL

高效爬虫,用executemany方法一次性插入MySQL多条数据(2),用字典格式保存数据并存入数据库

2020-06-08 05:13 549 查看

大家好,我是天空之城,今天给大家带来,用executemany方法一次性插入MySQL多条数据(2),用字典格式保存数据并存入数据库,注意executemany方法后面的%s写法,与上一篇不同。
有兴趣加qq群,纯学习,1098016198。
第一步建立表格。

import pymysql

conn = pymysql.connect(host='127.0.0.1', port=3306, user='root', passwd='qwer123456', db='mydb', charset='utf8')

cursor = conn.cursor()

sql = """create table movies9(
m_id int primary key auto_increment,
movie_title varchar (100) not null,
movie_link varchar(200) null,
movie_pingfen varchar(110) not null,
movie_pinglun varchar(200) null
)"""
cursor.execute(sql)

cursor.close()
conn.close()

第二步,获取并插入数据。

import requests,lxml,pymysql
from lxml import etree
import csv
import smtplib
from email.mime.text import MIMEText
from email.header import Header

header = {
'Referer': 'https://movie.douban.com/top250?start=1&filter=',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:46.0) Gecko/20100101 Firefox/46.0'}

url='https://movie.douban.com/top250?start={}&filter='
def getdata(url):
response = requests.get(headers=header, url=url)
response.encoding='utf-8'
return response.text

def getitem(source):
html=etree.HTML(source)
filmlist=html.xpath('//div[@class="info"]')
list_all = []
for film in filmlist:
filmdict={}
title=film.xpath('div[@class="hd"]/a/span[@class="title"][1]/text()')[0]
link=film.xpath('div[@class="hd"]/a/@href')[0]
pingfen=film.xpath('div[@class="bd"]/div[@class="star"]/span[@class="rating_num"]/text()')[0]
#daoyan = film.xpath('div[@class="bd"]/p[@class=""]/text()')
pinglun=film.xpath('div[@class="bd"]/p/span[@class="inq"]/text()')

if pinglun:
pinglun=pinglun[0]
else:
pinglun=''

# filmdict['title']=title#名称
# filmdict['link'] =link#链接
# filmdict['pingfen'] =pingfen#评分
# filmdict['pinglun'] =pinglun#评论
#list_all.append(filmdict)

row={'movie_title':title,
'movie_link':link,
'movie_pingfen':pingfen,
'movie_pinglun':pinglun
}

list_all.append(row)
#print(list_all)
return list_all

#print(title + '\n' + link + '\n' + pingfen + '\n' + pinglun)
#print(title,link,pingfen,pinglun)
#list_all.append(title+'\n'+link+'\n'+pingfen+'\n'+pinglun)

def save_data(data):
conn = pymysql.connect(host='127.0.0.1', port=3306, user='root', passwd='qwer123456', db='mydb', charset='utf8')

cursor = conn.cursor()

sql = '''insert into movies9 (movie_title, movie_link, movie_pingfen,movie_pinglun) values
(%(movie_title)s,%(movie_link)s,%(movie_pingfen)s,%(movie_pinglun)s)'''

cursor.executemany(sql,data)

conn.commit()
cursor.close()
conn.close()

#这一部分注释的是写入csv表格的代码,有相似之处,方便对比,留在这里给大家学习
# def writedata(list_all):
#     with open('douban2.csv','w',newline="",encoding='utf-8') as file:
#
#         writer=csv.DictWriter(file,fieldnames=['title', 'link', 'pingfen', 'pinglun'])
#         writer.writeheader()
#         for each in list_all:
#             writer.writerow(each)

if __name__ == '__main__':
list_all=[]
for i in range(10):
pagelink=url.format(i*25)
source=getdata(pagelink)
list_all+=getitem(source)
save_data(list_all)

数据库截图

内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: