您的位置:首页 > 编程语言 > Python开发

参考教程,练习BeautifulSoup实例

2016-03-21 14:08 239 查看
<pre name="code" class="python">#!/usr/bin/env python3
# -*- coding: utf-8 -*-

from bs4 import BeautifulSoup
import requests
import re
import threading
import queue

root_url = 'http://pyvideo.org'
index_url = root_url + '/category/50/pycon-us-2014'
q = queue.Queue()
result = []

def get_video_page_urls():
response = requests.get(index_url)
soup = BeautifulSoup(response.text, "lxml")
return [a.get('href') for a in soup('a', class_='thumbnail')]

def get_video_msg(video_url):
# print('url=', root_url + video_url)
video_data = {}
response = requests.get(root_url + video_url)
soup = BeautifulSoup(response.text, 'lxml')
tag = soup.find(id='sidebar')
video_data['Category'] = tag.find('a', href=re.compile('category')).string
try:
video_data['Speakers'] = tag.find('meta', property='author').get('content')
except:
video_data['Speakers'] = 'Unknown'
video_data['Language'] = tag.find('meta', property='inLanguage').previous_element.strip()
video_data['Recorded'] = tag.find('meta', property='dateCreated').previous_element.strip()
try:
video_data['Video origin'] = tag.find('a', property='embedUrl').get('href')
except:
video_data['Video origin'] = 'Unknown'
# print(video_data['Category'])
# print(video_data['Speakers'])
# print(video_data['Language'])
# print(video_data['Recorded'])
# print(video_data['Video origin'])
return video_data

def show_video_stats():
video_list = get_video_page_urls()
for video_url in video_list:
print(get_video_msg(video_url))

class Mythread(threading.Thread):
global result
global q

def __init__(self, name):
threading.Thread.__init__(self)
self.name = name

def run(self):
while True:
if q.empty():
# q.task_done()
break
print('thread:', self.name)
url = q.get()
result.append(get_video_msg(url))
q.task_done()

# get_video_msg('/video/2668/writing-restful-web-services-with-flask')
# show_video_stats()

def main():
video_list = get_video_page_urls()
for video_url in video_list:
q.put(video_url)
for i in range(8):
Mythread(i).start()
q.join()
for msg in result:
print(msg)

main()



                                            
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  python