python爬虫之selenium使用案例教程

2018-12-30 本文已影响0人 Pickupthesmokes
  #　selenium:是一个web的自动化测试工具,可以直接运行在浏览器上,
# 但是并不自带浏览器,需要有浏览器驱动,selenium可以根据我们的    代码指令
# 让浏览器自动加载页面,这时得到的页面源码是经过浏览器渲染之    后的,
# 然后我们就可以在页面源码中寻找节点(动态加载的网页,模拟登录)
#pip3 install selenium
from selenium import webdriver
import time
#加载页面
# driver = webdriver.Firefox(
#     executable_path='/home/ljh/桌面/driver/geckodriver'
# )
# #使用get方法打开页面
# driver.get('https://www.baidu.com/')
#加载页面(PhantomJS,无头浏览器)
#warnings.warn('Selenium support for PhantomJS
# has been deprecated, please use headless '
#目前推荐使用谷歌的屋头浏览器
# driver = webdriver.PhantomJS(
#     executable_path='/home/ljh/桌面/driver/phantomjs'
# )
# driver.get('https://www.baidu.com/')
#
# driver.save_screenshot('baidu.png')
# 加载页面(使用谷歌的浏览器驱动)
#设置为无头浏览器
# opt = webdriver.ChromeOptions()
# opt.set_headless()
# driver = webdriver.Chrome(
#     options=opt,
#     executable_path='/home/ljh/桌面/driver/chromedriver'
# )
driver = webdriver.Chrome(
    executable_path='/home/ljh/桌面/driver/chromedriver'
)
#设置页面的加载时间
driver.set_page_load_timeout(10)
#导入容错的模块
from selenium.common import exceptions
try:
    driver.get('https://www.baidu.com/')
except exceptions.TimeoutException as err:
    print(err,'请求超时')
#可以获得信息
# 获取页面源码(经过浏览器渲染之后的)
page_html = driver.page_source
with open('baidu.html','w') as file:
    file.write(page_html)
#获取cookies信息
"""
[
{'domain': 
'.baidu.com', 
'httpOnly': False, 
'path': '/', 
'secure': False, 
'value': '1431_21080_28206_28131_27750_28139_27509', 
'name': 'H_PS_PSSID'}, 
{'domain': '.baidu.com', 'httpOnly': False, 'path': '/', 'expiry':     3693275324.184597, 'secure': False, 'value':     '8C1C72599F01E693A201BA4B33C6DFE0', 'name': 'BIDUPSID'}, {'domain': '.baidu.com', 'httpOnly': False, 'path': '/', 'secure': False, 'value': '0', 'name': 'delPer'}, {'domain': '.baidu.com', 'httpOnly': False, 'path': '/', 'expiry': 3693275324.184649, 'secure': False, 'value': '1545791676', 'name': 'PSTM'}, {'domain': 'www.baidu.com', 'httpOnly': False, 'path': '/', 'expiry': 1546655678, 'secure': False, 'value': '123353', 'name': 'BD_UPN'}, {'domain': 'www.baidu.com', 'httpOnly': False, 'path': '/', 'secure': False, 'value': '0', 'name': 'BD_HOME'}, {'domain': '.baidu.com', 'httpOnly': False, 'path': '/', 'expiry': 3693275324.18448, 'secure': False, 'value': '8C1C72599F01E693A201BA4B33C6DFE0:FG=1', 'name': 'BAIDUID'}]
"""
  #获取所有的cookies值
cookies = driver.get_cookies()
#获取某一个cookies值
driver.get_cookie('BD_UPN')
cookies_dict = {cookie['name']:cookie['value'] for cookie in cookies}
print(cookies)
print(cookies_dict)
#删除cookie
# driver.delete_cookie('BD_UPN')
# #删除所有的cookies
# driver.delete_all_cookies()
# #添加cookies
# #cookie_dict(字典,存放的cookies信息)
# driver.add_cookie()
#获取当前加载的页面url地址
cur_url = driver.current_url
print(cur_url)
#获取当前使用的浏览器的名称
name = driver.name
print(name)
#定位和操作节点(标签)
"""
driver.find_element_by_xpath():根据xpath路径定位标签(找单个)
driver.find_elements_by_xpath()根据xpath路径定位标签(找所有)
driver.find_element_by_css_selector():根据css选择器定位标签
driver.find_element_by_link_text():根据标签文本内容(完整)定位
driver.find_element_by_partial_link_text():根据标签文本内容(局部)定位
driver.find_element_by_id():根据id属性寻找节点
driver.find_element_by_class_name():根据class属性寻找节点
"""
#找到节点,并输入内容
driver.find_element_by_id('kw').send_keys('隔壁老王')
#清空输入框
driver.find_element_by_id('kw').clear()
time.sleep(2)
driver.find_element_by_id('kw').send_keys('隔壁老赵')
#找到按钮,模拟点击
driver.find_element_by_id('su').click()
#保存屏幕的截图
driver.save_screenshot('baiduyixia.png')
# #前进后退
# time.sleep(2)
# #后退
# driver.back()
#
# time.sleep(2)
# #前进
# driver.forward()
#设置页面等待
#因为selenium加载页面和浏览器一样都需要时间,
#特别是动态页面,如果在页面加载出来之前,寻找节点会报异常
#所以这是需要设置页面等待
time.sleep(3)
#设置隐士等待
#是指定特定的时间,如果没有出现我们寻找的节点,
#隐士等待将会等待一段时间继续查找
driver.implicitly_wait(10)
#设置显示等待
#指定一个最长等待时间,直到某一条件成立继续执行,
#如果在指定时间内没有满足条件（没有找到节点）,
#这时就会抛出异常
#导入By,根据某个条件查找节点
from selenium.webdriver.common.by import By
#WebDriverWait设置等待时间
from selenium.webdriver.support.ui import WebDriverWait
#expected_conditions设置等待条件
from selenium.webdriver.support import expected_conditions
#driver, timeout
a_element = WebDriverWait(driver,10).until(
    expected_conditions.presence_of_element_located((By.CLASS_NAME,'n'))
)
print(a_element.text)
#获取节点的文本和属性
#.get_attribute('href'):获取标签的属性值
#Message: no such element: Unable to locate element 没有找到对应的节点
try:
href =   driver.find_element_by_xpath('//h3[@class="t"]/a').get_attribute('href')
#.text获取标签的文本
title = driver.find_element_by_xpath('//h3[@class="t"]/a').text
print(href,title)
except exceptions.NoSuchElementException as err:
      print('没有找到节点')
#隐藏所有图片execute_script执行js语句
imgs = driver.find_elements_by_xpath('//img')
for img in imgs:
    driver.execute_script('$(arguments[0]).fadeOut()', img)
#### 向下滚动到页面底部
driver.execute_script('window.scrollTo(0,document.body.scrollHeight)'        )
#关闭操作
#关闭当前所在的窗口
# driver.close()
# #退出浏览器
# driver.quit()
python爬虫之selenium使用案例教程

猜你喜欢

热点阅读