您的位置:首页 > 编程语言 > Python开发

python爬虫技术实现图片提取

2017-04-30 17:38 741 查看
# -*- coding: UTF-8 -*-
import urllib2,urllib,os
'''
Created on 2017年4月16日

@author: 恋蝶
'''
mmurl = "https://mm.taobao.com/json/request_top_list.htm?type=0&page="
temp = "https://mm.taobao.com/self/aiShow.htm?spm=719.7763510.1998643336.1.pI7wQq&userId="
T = '''src="'''
wjming = "E:\\Pythonpaqu\\Python\\picimg\\"
i = 0
ph = -1
while i < 16:
url = mmurl + str(i)
up = urllib2.urlopen(url)
cont = up.read()
ahref = "user_id="
target = '''target="_blank"'''
h=1
pa = cont.find(ahref)
pt = cont.find(target, pa)
while cont.find(ahref)>=0:
userid = cont[pa + len(ahref): pt - 2]
grzxurl = temp + userid
mmup = urllib2.urlopen(grzxurl)
imgcont = mmup.read()
mtname = cont[pa + len(ahref): pt + 36]
mta = '''target="_blank">'''
mtb = '''</a>'''
mtapa = mtname.find(mta)
mtapb = mtname.find(mtb,mtapa)
wjname = mtname[mtapa + len(mta) : mtapb]
wjname = str(i) + wjname + str(h)
h +=1
zzwjm = wjming + wjname
isExists=os.path.exists(zzwjm)
if not isExists:
os.makedirs(zzwjm)
print (zzwjm)
imgsty = '''img style="float:'''
tstar = "tstar.jpg"
imgpa = imgcont.find(imgsty)
imgpt = imgcont.find(tstar,imgpa)
l=1
while imgcont.find(imgsty)>=0:
whileimg = imgcont[imgpa + len(imgsty): imgpt+9]
imgcont = imgcont[imgpt:]
imgpa = imgcont.find(imgsty)
imgpt = imgcont.find(tstar,imgpa)
temurl = whileimg.find(T)
imgurl = "http:" + whileimg[temurl + len(T):]
urllib.urlretrieve(imgurl, zzwjm +"\\tu"+ str(l) + ".jpg")
l +=1
cont = cont[pt:]
pa = cont.find(ahref)
pt = cont.find(target, pa)
i += 1
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  python 爬虫 淘女郎