您的位置:首页 > 编程语言 > Python开发

python爬虫系列之爬京东手机数据

2015-06-23 21:50 781 查看
python抓京东手机数据

作者:vpoet

mail:vpoet_sir@163.com

#coding=utf-8

import urllib2

from lxml import etree

import re

if __name__ == '__main__':

main_url = """http://search.jd.com/Search?keyword=%%E6%%89%%8B%%E6%%9C%%BA&enc=utf-8&suggest=0#keyword=%%E6%%89%%8B%%E6%%9C%%BA&enc=utf-8&qrst=1&ps=addr&rt=1&stop=1&sttr=1&cid3=655&click=3-                655&psort=3&page=%s"""

page_num = 1

for page in range(page_num):

html_url = main_url % page

Res = urllib2.urlopen(html_url)

Htm = Res.read()

#print Htm

tree = etree.HTML(Htm);

#phone_names = tree.xpath("//div[@id='plist']/ul/li/div[@class='lh-wrap']/div[@class='p-name']/a/text()")

#x = 1
#for phone_name in phone_names:
#print phone_name+'\t'+str(x)+'\n'

#x=x+1

#phone_pic_urls = tree.xpath("//div[@class='lh-wrap']/div[@class='p-img']/a/img")

#for phone_pic_url in phone_pic_urls:
#print phone_pic_url.values()[3]

#phone_prices = tree.xpath("//div[@class='p-price']/strong")
phone_prices = tree.xpath("//*[@id='plist']/ul[@class='list-h clearfix']/li/div/div[@class='p-price']/strong")

x = 1

for phone_price in phone_prices:
print phone_price.values()[1]+'\t'+str(x)+'\n'
x = x + 1

#phone_comments = tree.xpath("//div[@class='extra']/a/text()")

#for phone_comment in phone_comments:
#print "评价数"
#comment_num = re.findall(r'.{2}(\d+).{3}',phone_comment)
#print comment_num[0]

#phone_good_comments = tree.xpath("//div[@class='extra']/span[@class='reputation']/text()")

#for phone_good_comment in phone_good_comments:
#print "好评率"
#comment_good_num = re.findall(r'\((\d{2})%.{2}\)',phone_good_comment)
#print comment_good_num[0]

print "over"


这个没写完,先保存在这里。有时间再完成
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: