【自动化】设置代理IP

2019-05-16  本文已影响0人  创造new_world

Firefox中设置代理ip

method_1

from selenium import webdriver

profile = webdriver.FirefoxProfile()
profile.set_preference('network.proxy.type', 1)
profile.set_preference('network.proxy.http', '127.0.0.1')
profile.set_preference('network.proxy.http_port', 17890)  # int
profile.update_preferences()
driver = webdriver.Firefox(firefox_profile=profile)
driver.get('http://httpbin.org/ip')

method_2

from selenium import webdriver
from selenium.webdriver.common.proxy import Proxy
from selenium.webdriver.common.proxy import ProxyType

proxy = Proxy(
    {
        'proxyType': ProxyType.MANUAL,
        'httpProxy': get_proxy_ip_port()
    }
)
driver = webdriver.Firefox(proxy=proxy)
driver.get('http://httpbin.org/ip')

phantomjs中设置代理ip

方法一:太不优雅(还是看方法二吧)

在phantomjs中不能像上面的Firefox的method2一样传入proxy
phantomjs和Firefox均继承自WebDriver,父类WebDriver可以传入proxy
phantomjs在初始化WebDriver时就没有留proxy参数
所以可以像下图一样改一下phantomjs类的源码,就可以在phantomjs中传入proxy参数了

# 注意授权
# Licensed to the Software Freedom Conservancy (SFC) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The SFC licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

[图片上传失败...(image-494c7f-1532569471107)]

下面才是示例
from selenium import webdriver
from selenium.webdriver.common.proxy import Proxy
from selenium.webdriver.common.proxy import ProxyType

proxy = Proxy(
    {
        'proxyType': ProxyType.MANUAL,
        'httpProxy': get_proxy_ip_port()
    }
)
driver = webdriver.PhantomJS(
    executable_path="/path/of/phantomjs",
    proxy=proxy
    )
driver.get('http://httpbin.org/ip')
print driver.page_source
driver.close()
方法二:
from selenium import webdriver
from selenium.webdriver.common.proxy import Proxy
from selenium.webdriver.common.proxy import ProxyType
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities

proxy = Proxy(
    {
        'proxyType': ProxyType.MANUAL,
        'httpProxy': 'ip:port'  # 代理ip和端口
    }
)
# 新建一个“期望的技能”,哈哈
desired_capabilities = DesiredCapabilities.PHANTOMJS.copy()
# 把代理ip加入到技能中
proxy.add_to_capabilities(desired_capabilities)
driver = webdriver.PhantomJS(
    executable_path="/path/of/phantomjs",
    desired_capabilities=desired_capabilities
    )
driver.get('http://httpbin.org/ip')
print driver.page_source
driver.close()
方法三(动态切换ip):
from selenium import webdriver
from selenium.webdriver.common.proxy import Proxy
from selenium.webdriver.common.proxy import ProxyType
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities

proxy = Proxy(
    {
        'proxyType': ProxyType.MANUAL,
        'httpProxy': 'ip:port'  # 代理ip和端口
    }
)
# 新建一个“期望技能”,哈哈
desired_capabilities = DesiredCapabilities.PHANTOMJS.copy()
# 把代理ip加入到技能中
proxy.add_to_capabilities(desired_capabilities)
driver = webdriver.PhantomJS(
    executable_path="/path/of/phantomjs",
    desired_capabilities=desired_capabilities
)
# 测试一下
driver.get('http://httpbin.org/ip')
print driver.page_source

# 现在开始切换ip
# 再新建一个ip
proxy = Proxy(
    {
        'proxyType': ProxyType.MANUAL,
        'httpProxy': 'ip:port'  # 代理ip和端口
    }
)
# 再新建一个“期望技能”,()
desired_capabilities = DesiredCapabilities.PHANTOMJS.copy()
# 把代理ip加入到技能中
proxy.add_to_capabilities(desired_capabilities)
# 新建一个会话,并把技能传入
driver.start_session(desired_capabilities)
driver.get('http://httpbin.org/ip')
print driver.page_source
driver.close()

1. 背景

在使用selenium浏览器渲染技术,爬取网站信息时,一般来说,速度是很慢的。而且一般需要用到这种技术爬取的网站,反爬技术都比较厉害,对IP的访问频率应该有相当的限制。所以,如果想提升selenium抓取数据的速度,可以从两个方面出发:

2. 为chrome设置代理IP

from selenium import webdriver
chromeOptions = webdriver.ChromeOptions()

# 设置代理
chromeOptions.add_argument("--proxy-server=http://202.20.16.82:10152")
# 一定要注意,=两边不能有空格,不能是这样--proxy-server = http://202.20.16.82:10152
driver = webdriver.Chrome(r"D:\chromedriver.exe",chrome_options = chromeOptions)

# 查看本机ip,查看代理是否起作用
driver.get("http://httpbin.org/ip")
print(driver.page_source)

# 退出,清除浏览器缓存
driver.quit()

3. 需要用户名密码验证的代理

上一篇下一篇

猜你喜欢

热点阅读