您的位置:首页 > 编程语言 > Python开发

python+mysql抓取百度新闻的标题存到数据库

2016-02-22 16:36 423 查看
#!usr/bin/python
# -*- coding:utf-8 -*-
import urllib2

import re

import MySQLdb

class BaiDuNews:

def __init__(self):
self.baseurl = 'http://news.baidu.com/'

def getPage(self):
request = urllib2.Request(self.baseurl)
response = urllib2.urlopen(request)
# print response.read()
return response.read().decode('gbk')

def getContents(self,page):
pattern = re.compile('<li class="hd.*?<a.*?>(.*?)</a>', re.S)
items = re.findall(pattern, page)
contents = []
for item in items:
print item
contents.append(item.encode('utf-8'))
return contents

def saveDB(self, contents):
db = MySQLdb.connect(host='127.0.0.1',user='root',passwd='',db='test',charset='utf8')
cur = db.cursor()
# sql = 'CREATE TABLE baidunews (`id` INT NOT NULL PRIMARY ,`text` VARCHAR(255))'
# cur.execute(sql)
sql2 = """INSERT INTO baidunews VALUES (NULL ,"%s")"""
for  content in contents:
cur.execute(sql2 % (content))
cur.close()
db.commit()
db.close()

news = BaiDuNews()
news.saveDB(news.getContents(news.getPage()))
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: