您的位置：首页 > 编程语言 > Python开发

Python爬虫中selenium的使用

2019-03-31 14:41 393 查看

selenium的使用

selenium是一个支持各种浏览器的自动化测试工具

1.快速使用

pip install selenium

2.下载selenium驱动放在python的script文件目录下

地址:https://sites.google.com/a/chromium.org/chromedriver/

使用:

from selenium import webdriver
import time
from selenium.webdriver.common.keys import Keys

if __name__ == '__main__':
# 实例化浏览器
browser = webdriver.Chrome()
# 设置浏览器的窗口大小位置
browser.set_window_position(x=50, y=60)
browser.set_window_size(width=800, height=500)
# 发送get亲求
browser.get('http://www.baidu.com')
# 输出标题
print(browser.title)
# 选中亲请求的结果中id为kw的元素,写入信息'python
time.sleep(1)
input_ele = browser.find_element_by_id('kw')
input_ele.send_keys('python')
# 选中按钮,点击按钮
time.sleep(1)
button = browser.find_element_by_id('su')
button.click()
print(browser.title)
# 打印网页源码
print(browser.page_source)
# 打印当前网页的url
now_url = browser.current_url
print(now_url)

获取到的元素对象的一些常用操作:

element.size #返回元素的尺寸
element.text #获取元素的文本
element.get_attribute(name) #获得属性值
element.is_displayed() #检查该元素是否用户可见

2.页面操作

<input type = 'text' name = 'password' id='password-id'/>

获取input元素,向input元素中插入数据

input_ele = browser.find_element_by_id('password-id')
input_ele = browser.find_element_by_name('password')
input_ele = browser.find_element_by_tag_name('input')
input_ele = browser.find_element_by_xpath('//input[@id="password-id"]')

注意:在使用xpath的时候,如果获取的是多个元素,那么他只会返回第一个元素,如若没有获取到,那么会抛出NoSuchElementException

输入数据

input_ele.send_keys("输入的数据",Keys.ENTER)

Keys这个类具有模拟键盘的操作,Keys.ENTER就是在输入完数据之后,模拟键盘回车

清除数据

input_ele.clear()

下拉选项卡的处理

from selenium import webdriver
from selenium.webdriver.common.keys import Keys

if __name__ == '__main__':
browser = webdriver.Chrome()
browser.get('https://puregrips.com/')
# 选取select
ele_select = browser.find_element_by_id("searchType")
# 从select中选取标签名为option的元素  这里elements  选中的十分多元素
all_options = ele_select.find_elements_by_tag_name('option')
for option in all_options:
# 遍历输出option中value值
print('选项的值为%s' % option.get_attribute("value"))

selenium中自带的下拉选框处理类select

from selenium.webdriver.support.ui import Select

browser = webdriver.Chrome()
browser.get('https://puregrips.com/')
# 使用select类去选中下拉选项
select = Select(browser.find_element_by_id("searchType"))
# 选中value的值是product的option
select.select_by_value('product')
# 根据index选中option
select.select_by_index()
# 根据内容选中option
select.select_by_visible_text()

# 全部取消选中
select.deselect_all()
# 取消选中一句index
select.deselect_by_index()
# 取消选中,根据value
select.deselect_by_value('product')
# 根据内容取消选中option
select.deselect_by_visible_text()

# 获取所有已选选项
all_selected_options = select.all_selected_options

# 获取所有可选选项
options = select.options

表单提交

# 通过选中提交按钮,给提交按钮单击事件
button = browser.find_element_by_id('submit')
button.click()

鼠标事件

context_click() #右击
double_click() #双击
drag_and_drop() #拖动
move_to_element() #鼠标悬停在一个元素上
click_and_hold() #按下鼠标左键在一个元素上

执行事件

from selenium.webdriver.common.action_chains import ActionChains
...
#定位到要右击的元素
right =browser.find_element_by_xpath("xx")
#实例化事件对象
action = ActionChains(browser)
#事件对象去执行事件
action.context_click(right).perform()
#.perform()就是执行存储的行为   也就是去执行的意思

鼠标移动到某个元素上

#引入 ActionChains 类
from selenium.webdriver.common.action_chains import ActionChains
...
#定位到鼠标移动到上面的元素
above = driver.find_element_by_xpath("xxx")
#对定位到的元素执行鼠标移动到上面的操作
ActionChains(driver).move_to_element(above).perform()

键盘事件

from selenium.webdriver.common.keys import Keys

send_keys(Keys.BACK_SPACE) 删除键（BackSpace）
send_keys(Keys.SPACE) 空格键(Space)
send_keys(Keys.TAB) 制表键(Tab)
send_keys(Keys.ESCAPE) 回退键（Esc）
send_keys(Keys.ENTER) 回车键（Enter）
send_keys(Keys.CONTROL,'a') 全选（Ctrl+A）
send_keys(Keys.CONTROL,'c') 复制（Ctrl+C）
send_keys(Keys.CONTROL,'x') 剪切（Ctrl+X）
send_keys(Keys.CONTROL,'v') 粘贴（Ctrl+V）

滑块

if __name__ == '__main__':
from selenium import webdriver
from selenium.webdriver import ActionChains
import time

browser = webdriver.Chrome()
browser.get('http://www.bootcss.com/p/metro-ui-css/slider.html')
# 选中需要滑动的元素
huakuai = browser.find_element_by_xpath("//div[@class='slider'][1]/div[@class='marker']")
# 给浏览器添加事件
action = ActionChains(browser)
# 对滑块按住鼠标左键不放
action.click_and_hold(huakuai).perform()
# 循环
for i in range(200):
try:
# 循环的移东滑块,当滑块移动到末尾的时候会发生错误,捕获异常
action.move_by_offset(2, 0).perform()
except:
break
# 继续执行事件
action.reset_actions()
# 每次滑块移动的间歇时间
time.sleep(0.1)
browser.quit()

页面切换

# 获取所有窗口列表顺序是从新到旧
handels = browser.windo_handles
# 获取当前窗口
current_handel = browser.current_window_handel
for handel in handels:
if handel != current_handel:
print('要切换窗口到',handel)
# 关闭旧的窗口
browser.cloes()
browser.switch_to.window(handel)

弹窗处理 (默认弹窗) (自定义美化的弹窗,电商广告)

# 一`

# 切换到弹窗的位置
al = browser.switch_to_alert()
# 点击确认按钮
al.accept()
# 点击取消按钮
al.dismiss()
# 如果弹窗需要输入
al.send_keys("xxx")
# 返回文字内容
al.text

# 二   需要结合js
from selenium import webdriver
import time

browser = webdriver.Firefox()
browser.get("http://sh.xsjedu.org/")
time.sleep(1)
# 定义js让js去定位弹出窗口的位置,设置其的css属性为隐藏
js = 'document.getElementById("doyoo_monitor").style.display="none";'
browser.execute_script(js)

Cookie

添加cookie

browser.get('http://www.baidu.com')
cookie = {
'name':'张三',
'age':'40'
}
browser.add_cookie(cookie)

获取当前页面的cookie

browser.get('http://www.baidu.com')

current_cookie = browser.get_cookies()

# 将获取的cookie_list转换成为requests模块可用的cookie
cookie_str_list = []
for cookie_item in current_cookie:
name = cookie_item['name']
value = cookie_item['value']
cookie_str = name + '=' + value
cookie_str_list.append(cookie_str)

# cookie 的 字符串
cookie = '; '.join(cookie_str_list)

控制滚动条

<!--滚动条的属性-->
<body  "document.body.scrollTop=0 ">
<body  "document.body.scrollTop=100000 ">

只要通过js来控制滚动在最底部就可以了 ------------------ 存在问题

#将页面滚动条拖到底部
js="var q=document.documentElement.scrollTop=10000"
driver.execute_script(js)
time.sleep(3)
#将滚动条移动到页面的顶部
js1="var q=document.documentElement.scrollTop=0"
driver.execute_script(js1)
time.sleep(3)
复制代码

3.总结元素选择

定位的选择支持层级操作(链式操作)

单元素选择

find_element_by_id
find_element_by_name
find_element_by_xpath
find_element_by_link_text
find_element_by_partial_link_text
find_element_by_tag_name
find_element_by_class_name
find_element_by_css_selector

多元素选择

find_elements_by_name
find_elements_by_xpath
find_elements_by_link_text
find_elements_by_partial_link_text
find_elements_by_tag_name
find_elements_by_class_name
find_elements_by_css_selector

使用By类来选元素

from selenium.webdriver.common.by import By

driver.find_element(By.XPATH, '//button[text()="Some text"]')
driver.find_elements(By.XPATH, '//button')

'''
By类的属性
ID = "id"
XPATH = "xpath"
LINK_TEXT = "link text"
PARTIAL_LINK_TEXT = "partial link text"
NAME = "name"
TAG_NAME = "tag name"
CLASS_NAME = "class name"
CSS_SELECTOR = "css selector"
'''

4.元素等待

ajax技术元素的缓加载影响selenium的判断

容易引起错误ElementNotVisibleException

等待元素加载来减少错误

隐式等待–等待特定的时间默认时间0ms
显式等待–指定某一条件直到这个条件成立时继续执行默认时间500ms

显式等待

显式等待指定某个条件，然后设置最长等待时间。如果在这个时间还没有找到元素，那么便会抛出异常了

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

browser = webdriver.Chrome()
browser.get("http://somedomain/url_that_delays_loading")
try:
# 等待10s直到有一个ID为myDynamicElement的元素出来,将元素选中,否则
# 10秒过了就抛出异常
element = WebDriverWait(browser, 10).until(
EC.presence_of_element_located((By.ID, "myDynamicElement"))
)
finally:
driver.quit()

'''
EC常用的等待条件:
title_is   标题是?
title_contains  标题有?
presence_of_element_located  元素存在
visibility_of_element_located  定位元素的可见性
visibility_of
presence_of_all_elements_located
text_to_be_present_in_element
text_to_be_present_in_element_value
frame_to_be_available_and_switch_to_it
invisibility_of_element_located
element_to_be_clickable – it is Displayed and Enabled.
staleness_of
element_to_be_selected
element_located_to_be_selected
element_selection_state_to_be
element_located_selection_state_to_be
alert_is_present
'''

# 显式等待的第二个方式    wait和等待条件EC分开写
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

wait = WebDriverWait(driver, 10)
element = wait.until(EC.element_to_be_clickable((By.ID,'someid'))

隐式等待

from selenium import webdriver

browser = webdriver.Chrome()
browser.implicitly_wait(10) # seconds
browser.get("http://somedomain/url_that_delays_loading")
myDynamicElement = browser.find_element_by_id("myDynamicElement")

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航