您的位置:首页 > 数据库 > MySQL

scrapy 数据存储mysql

2017-05-10 17:29 387 查看
#spider.py
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule
from Cwpjt.items import CwpjtItem

class FulongSpider(CrawlSpider):
name = 'fulong'
allowed_domains = ['sina.com.cn']
start_urls = ['http://sina.com.cn/']
'http://news.sina.com.cn/c/2017-05-09/doc-ifyeycte9324112.shtml'
rules = (
Rule(LinkExtractor(allow=('.*?/[0-9]{4}.[0-9]{2}.[0-9]{2}.doc-.*?shtml'),allow_domains=('sina.com.cn')),
callback='parse_item', follow=True),
)

def parse_item(self, response):
i = CwpjtItem()
i['name']=response.xpath('/html/head/title/text()').extract()
i['kws'] = response.xpath('/html/head/meta[@name="keywords"]/@content').extract()
#i['domain_id'] = response.xpath('//input[@id="sid"]/@value').extract()
#i['name'] = response.xpath('//div[@id="name"]').extract()
#i['description'] = response.xpath('//div[@id="description"]').extract()
return i


pipeline

import pymysql
from pymysql import connections
class CwpjtPipeline(object):
def __init__(self):
self.conn = pymysql.connect(host='127.0.0.1',user='root',passwd='123456',db ='mydb')
self.cursor = self.conn.cursor()
def process_item(self, item, spider):
name = item['name'][0]
kws = item['kws'][0]
sql ="insert into hehe(title,kws) VALUES(%s,%s)"
self.cursor.execute(sql,(name,kws,))
self.conn.commit()
return item
def close_spider(self,spider):
self.conn.close()


item

import scrapy

class CwpjtItem(scrapy.Item):
# define the fields for your item here like:
name = scrapy.Field()
kws = scrapy.Field()
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: