【python脚本】-刷CSDN博客流量
2016-09-02 17:26
447 查看
本脚本可以通过打开CSDN博客页面,来增加博客访问量。写此脚本纯粹是为了练手,想要增加访问量,写出高质量的文章才是王道。脚本如下:
#!usr/bin/python
# -*- coding: utf-8 -*-
import urllib2
import re
import time
from bs4 import BeautifulSoup
baseUrl = 'http://blog.csdn.net'
'''
抓取页面信息
'''
def getPage(url):
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
headers = {'User-Agent':user_agent} #伪装成浏览器访问
req = urllib2.Request(url,headers=headers)
myResponse = urllib2.urlopen(req)
myPage = myResponse.read()
return myPage
'''
得到文章分页数目
'''
def getNumber(url):
myPage = getPage(url)
soup = BeautifulSoup(myPage,'html.parser',from_encoding='utf-8') #利用BeautifulSoup解析XML
papeList = soup.find(id="papelist")
numberList = papeList.contents[1].string.strip()
#得到的string “ 97条 共7页”
#用re.split可以匹配多个空格分隔
numberStr = re.split(r'\s+', numberList)[1]
number = numberStr[1:-1]
return number
'''
得到所有文章标题和链接
'''
def getArticleLink(account):
myUrl = baseUrl + '/' +account
number = getNumber(myUrl)
page_num = 1
dic = {}
while page_num <= int(number):
url = myUrl+'/article/list/'+str(page_num) #博客文章列表链接
myPage = getPage(url)
soup = BeautifulSoup(myPage,'html.parser',from_encoding='utf-8')
for h1 in soup.find_all('h1'):
span = h1.contents[1]
link = span.contents[0]['href'].strip() #博客文章链接
name = span.contents[0].string.strip() #博客文章名称
dic[name] = baseUrl+link
page_num = page_num + 1
#print dic
return dic
'''
打开博客文章刷流量
'''
def openArticle(account,number,isAll,urlDic):
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
refererData = 'https://www.baidu.com/s?wd=%E3%80%90Python%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0%E3%80%91-%E8%87%AA%E5%8A%A8%E5%8F%91%E9%80%81%E9%82%AE%E4%BB%B6%E8%84%9A%E6%9C%AC&rsv_spt=1&rsv_iqid=0xd0c448a5000805ae&issp=1&f=8&rsv_bp=1&rsv_idx=2&ie=utf-8&rqlang=cn&tn=baiduhome_pg&rsv_enter=0&oq=%E3%80%90Python%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0%E3%80%91-%E8%87%AA%E5%8A%A8%E5%8F%91%E9%80%81%E9%82%AE%E4%BB%B6%E8%84%9A%E6%9C%AC&rsv_t=b892PEG45vPXxkNVtED7%2BXf%2BFk6gOTgA7wSkEm4698UcnIMg027x%2FbVgkQ%2BaCjgAe9DF&rsv_pq=ad3cf723000a6ce2&rsv_sug=1'
data = ''
headers = {'User-Agent' : user_agent, 'Referer' : refererData} #伪装成浏览器访问
count = 0
if isAll:
dic = getArticleLink(account)
else:
dic = urlDic
while count < number:
for key in dic:
try:
url = dic[key]
print key + '\t' + str(count)
request = urllib2.Request(url, data, headers)
rec = urllib2.urlopen(request)
time.sleep(10)
except Exception, e:
print e
time.sleep(120) #发生异常,可能是访问太频繁,多等一会
continue
count = count+1
if __name__ == "__main__":
account = "Kevin_zhai" #刷博客账户
number = 1 #每篇博客刷文章次数
isAll = True #是否刷所有博客
urlDic = {"【Spring学习笔记七】-Spring MVC基本配置和实例":"http://blog.csdn.net/kevin_zhai/article/details/52368420","【Spring学习笔记六】-Spring MVC框架":"http://blog.csdn.net/kevin_zhai/article/details/52279160"}
openArticle(account,number,isAll,urlDic)
#!usr/bin/python
# -*- coding: utf-8 -*-
import urllib2
import re
import time
from bs4 import BeautifulSoup
baseUrl = 'http://blog.csdn.net'
'''
抓取页面信息
'''
def getPage(url):
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
headers = {'User-Agent':user_agent} #伪装成浏览器访问
req = urllib2.Request(url,headers=headers)
myResponse = urllib2.urlopen(req)
myPage = myResponse.read()
return myPage
'''
得到文章分页数目
'''
def getNumber(url):
myPage = getPage(url)
soup = BeautifulSoup(myPage,'html.parser',from_encoding='utf-8') #利用BeautifulSoup解析XML
papeList = soup.find(id="papelist")
numberList = papeList.contents[1].string.strip()
#得到的string “ 97条 共7页”
#用re.split可以匹配多个空格分隔
numberStr = re.split(r'\s+', numberList)[1]
number = numberStr[1:-1]
return number
'''
得到所有文章标题和链接
'''
def getArticleLink(account):
myUrl = baseUrl + '/' +account
number = getNumber(myUrl)
page_num = 1
dic = {}
while page_num <= int(number):
url = myUrl+'/article/list/'+str(page_num) #博客文章列表链接
myPage = getPage(url)
soup = BeautifulSoup(myPage,'html.parser',from_encoding='utf-8')
for h1 in soup.find_all('h1'):
span = h1.contents[1]
link = span.contents[0]['href'].strip() #博客文章链接
name = span.contents[0].string.strip() #博客文章名称
dic[name] = baseUrl+link
page_num = page_num + 1
#print dic
return dic
'''
打开博客文章刷流量
'''
def openArticle(account,number,isAll,urlDic):
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
refererData = 'https://www.baidu.com/s?wd=%E3%80%90Python%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0%E3%80%91-%E8%87%AA%E5%8A%A8%E5%8F%91%E9%80%81%E9%82%AE%E4%BB%B6%E8%84%9A%E6%9C%AC&rsv_spt=1&rsv_iqid=0xd0c448a5000805ae&issp=1&f=8&rsv_bp=1&rsv_idx=2&ie=utf-8&rqlang=cn&tn=baiduhome_pg&rsv_enter=0&oq=%E3%80%90Python%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0%E3%80%91-%E8%87%AA%E5%8A%A8%E5%8F%91%E9%80%81%E9%82%AE%E4%BB%B6%E8%84%9A%E6%9C%AC&rsv_t=b892PEG45vPXxkNVtED7%2BXf%2BFk6gOTgA7wSkEm4698UcnIMg027x%2FbVgkQ%2BaCjgAe9DF&rsv_pq=ad3cf723000a6ce2&rsv_sug=1'
data = ''
headers = {'User-Agent' : user_agent, 'Referer' : refererData} #伪装成浏览器访问
count = 0
if isAll:
dic = getArticleLink(account)
else:
dic = urlDic
while count < number:
for key in dic:
try:
url = dic[key]
print key + '\t' + str(count)
request = urllib2.Request(url, data, headers)
rec = urllib2.urlopen(request)
time.sleep(10)
except Exception, e:
print e
time.sleep(120) #发生异常,可能是访问太频繁,多等一会
continue
count = count+1
if __name__ == "__main__":
account = "Kevin_zhai" #刷博客账户
number = 1 #每篇博客刷文章次数
isAll = True #是否刷所有博客
urlDic = {"【Spring学习笔记七】-Spring MVC基本配置和实例":"http://blog.csdn.net/kevin_zhai/article/details/52368420","【Spring学习笔记六】-Spring MVC框架":"http://blog.csdn.net/kevin_zhai/article/details/52279160"}
openArticle(account,number,isAll,urlDic)
相关文章推荐
- 【Python脚本】-爬虫得到CSDN博客的文章访问量和评论量
- CSDN博客专栏文章批量下载脚本[python实现]
- Python刷CSDN博客脚本v2.0
- CSDN博客专栏文章批量下载脚本[python实现]
- python访问Hive配置 - jmydream的专栏 - 博客频道 - CSDN.NET
- Python模块学习 ---- fileinput - 成长的点滴,记录与分享 - 博客频道 - CSDN.NET
- 第一个Python程序——博客自动访问脚本
- 查看CSDN博客流量
- 自动下载并保存博客 Python脚本
- python fork - 挨踢骚客的旅途 - 博客频道 - CSDN.NET
- Python_博客自动访问脚本
- 2009.09.07 CSDN博客公告区取消使用脚本的功能! 札记
- CSDN收藏备份Python脚本
- 一个监视CSDN论坛押宝游戏状态的python脚本
- python manage.py runserver 0.0.0.0:8000 - zkz的专栏 - 博客频道 - CSDN.NET
- Python里的string 和 unicode (一) - 肥三的专栏 - 博客频道 - CSDN.NET
- CentOS安装python2.6 - 子剑灵学技术 - 博客频道 - CSDN.NET
- 【微博客】CSDN开了python专题
- 2009.09.07 CSDN博客公告区取消使用脚本的功能! 札记
- python之强大的日志模块 - 竹叶青 的专栏 - 博客频道 - CSDN.NET