您的位置:首页 > 编程语言 > Python开发

python,scrapy爬虫sql之爬取数据存储到mysql的piplelines.py配置

2017-08-18 13:44 881 查看
# -*- coding: utf-8 -*-

# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html 
import MySQLdb

DBKWARGS={'db':'test','user':'root', 'passwd':'',
'host':'localhost','use_unicode':True, 'charset':'utf8'}

class TutorialPipeline(object):

def __init__(self):
try:
self.con = MySQLdb.connect(**DBKWARGS)
except Exception,e:
print "Connect db error:",e

def process_item(self, item, spider):
cur = self.con.cursor()
sql = "insert into dmoz_book values(%s,%s,%s)"   ###数据库名
lis = (''.join(item["title"]),''.join(item["link"]), ''.join(item["desc"]))   ###提取内容标题、链接、描述
try:
cur.execute(sql,lis)
except Exception,e:
print "Insert error:",e
self.con.rollback()
else:
self.con.commit()
cur.close()
return item

def __del__(self):
try:
self.con.close()
except Exception,e:
print "Close db error",e

当然也可以简写:

import MySQLdb

#这段代码可以写在settings.py文件中
# database connection parameters
#DBKWARGS={'db':'ippool','user':'root', 'passwd':'toor','host':'localhost','use_unicode':True, 'charset':'utf8'}

class CollectipsPipeline(object):

def process_item(self, item, spider):

DBKWARGS = spider.settings.get('DBKWARGS')
con = MySQLdb.connect(**DBKWARGS)
cur = con.cursor()
sql = ("insert into proxy(IP,PORT,TYPE,POSITION,SPEED,LAST_CHECK_TIME) "
"values(%s,%s,%s,%s,%s,%s)")
lis = (item['IP'],item['PORT'],item['TYPE'],item['POSITION'],item['SPEED'],
item['LAST_CHECK_TIME'])
try:
cur.execute(sql,lis)
except Exception,e:
print "Insert error:",e
con.rollback()
else:
con.commit()
cur.close()
con.close()
return item
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: