您的位置:首页 > 编程语言 > Python开发

Python下载百度新歌100的代码

2011-07-09 23:54 645 查看
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright (c) 2006 UbuntuChina <http://www.ubuntu.org.cn>
# License: GPLv2
# Author: oneleaf <oneleaf AT gmail.com>

import httplib
import re
import urllib
import os
import locale

def getdownurl(url):
urllist=[]
conn = httplib.HTTPConnection('mp3.baidu.com')
conn.request("GET",url)
response = conn.getresponse()
html=response.read()
conn.close()
expression='http://220.181.27.54/m(.*)</a>'
listSentence = re.findall(expression, html)
lineno=0
while lineno<len(listSentence):
mp3url=re.search('title=(.*)onclick',listSentence[lineno])
if mp3url:
mp3url=mp3url.group(0)
mp3url=re.search('http(\S*)',mp3url)
if mp3url:
mp3url=mp3url.group(0)
try:
mp3url=mp3url.decode('gbk')
except:pass
urllist.append(mp3url)
lineno+=2
return urllist

def downmp3(url,author,name,filelist):
filename=author+"-"+name;
for i in filelist:
name=unicode(i,locale.getpreferredencoding())
if name.find(filename) == 0:
print u"文件已经下载,忽略。"
return 1
urllists=getdownurl(url)
for i in urllists:
print u"正在连接",i

ext=i[-4:]
try:
urlopen = urllib.URLopener()
fp=urlopen.open(i)
data = fp.read()
fp.close()
filename=filename+ext;
file=open(filename,'w+b')
file.write(data)
file.close()
print u"下载成功!"
return 1
except:
continue
return 0

if __name__ == "__main__":
conn = httplib.HTTPConnection('list.mp3.baidu.com')
conn.request("GET",'/list/newhits.html?id=1')
response = conn.getresponse()
html=response.read().decode('gbk')
conn.close()
expression='<a href="http://mp3.baidu.com/m(.*)</a>'
listSentence = re.findall(expression, html)
lineno=0
while lineno<len(listSentence):
url=re.search('(.*)target',listSentence[lineno])
url='/m'+url.group(0)[:-8]
name=re.search('blank>(.*)',listSentence[lineno])
name=name.group(0)[6:]
author=re.search('blank>(.*)',listSentence[lineno+1])
author=author.group(0)[6:]
print u"开始下载",author,name
filelist=os.listdir('.');
if downmp3(url,author,name,filelist)==0:
print u"下载",author,name,u'失败!'
lineno+=2
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: