您的位置:首页 > 编程语言 > Python开发

【python脚本】-刷CSDN博客流量

2016-09-02 17:26 447 查看
本脚本可以通过打开CSDN博客页面,来增加博客访问量。写此脚本纯粹是为了练手,想要增加访问量,写出高质量的文章才是王道。脚本如下:

#!usr/bin/python
# -*- coding: utf-8 -*-
import urllib2
import re
import time
from bs4 import BeautifulSoup

baseUrl = 'http://blog.csdn.net'

'''
抓取页面信息
'''
def getPage(url):
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
headers = {'User-Agent':user_agent} #伪装成浏览器访问
req = urllib2.Request(url,headers=headers)
myResponse = urllib2.urlopen(req)
myPage = myResponse.read()
return myPage

'''
得到文章分页数目
'''
def getNumber(url):
myPage = getPage(url)
soup = BeautifulSoup(myPage,'html.parser',from_encoding='utf-8') #利用BeautifulSoup解析XML
papeList = soup.find(id="papelist")
numberList = papeList.contents[1].string.strip()
#得到的string “ 97条 共7页”
#用re.split可以匹配多个空格分隔
numberStr = re.split(r'\s+', numberList)[1]
number = numberStr[1:-1]
return number

'''
得到所有文章标题和链接
'''
def getArticleLink(account):
myUrl = baseUrl + '/' +account
number = getNumber(myUrl)
page_num = 1
dic = {}
while page_num <= int(number):
url = myUrl+'/article/list/'+str(page_num) #博客文章列表链接
myPage = getPage(url)
soup = BeautifulSoup(myPage,'html.parser',from_encoding='utf-8')
for h1 in soup.find_all('h1'):
span = h1.contents[1]
link = span.contents[0]['href'].strip() #博客文章链接
name = span.contents[0].string.strip() #博客文章名称
dic[name] = baseUrl+link
page_num = page_num + 1
#print dic
return dic

'''
打开博客文章刷流量
'''
def openArticle(account,number,isAll,urlDic):
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
refererData = 'https://www.baidu.com/s?wd=%E3%80%90Python%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0%E3%80%91-%E8%87%AA%E5%8A%A8%E5%8F%91%E9%80%81%E9%82%AE%E4%BB%B6%E8%84%9A%E6%9C%AC&rsv_spt=1&rsv_iqid=0xd0c448a5000805ae&issp=1&f=8&rsv_bp=1&rsv_idx=2&ie=utf-8&rqlang=cn&tn=baiduhome_pg&rsv_enter=0&oq=%E3%80%90Python%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0%E3%80%91-%E8%87%AA%E5%8A%A8%E5%8F%91%E9%80%81%E9%82%AE%E4%BB%B6%E8%84%9A%E6%9C%AC&rsv_t=b892PEG45vPXxkNVtED7%2BXf%2BFk6gOTgA7wSkEm4698UcnIMg027x%2FbVgkQ%2BaCjgAe9DF&rsv_pq=ad3cf723000a6ce2&rsv_sug=1'
data = ''
headers = {'User-Agent' : user_agent, 'Referer' : refererData} #伪装成浏览器访问
count = 0
if isAll:
dic = getArticleLink(account)
else:
dic = urlDic
while count < number:
for key in dic:
try:
url = dic[key]
print key + '\t' + str(count)
request = urllib2.Request(url, data, headers)
rec = urllib2.urlopen(request)
time.sleep(10)
except Exception, e:
print e
time.sleep(120) #发生异常,可能是访问太频繁,多等一会
continue
count = count+1

if __name__ == "__main__":
account = "Kevin_zhai" #刷博客账户
number = 1 #每篇博客刷文章次数
isAll = True #是否刷所有博客
urlDic = {"【Spring学习笔记七】-Spring MVC基本配置和实例":"http://blog.csdn.net/kevin_zhai/article/details/52368420","【Spring学习笔记六】-Spring MVC框架":"http://blog.csdn.net/kevin_zhai/article/details/52279160"}
openArticle(account,number,isAll,urlDic)
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: