scrapy代理自定义中间件

2018-01-13  本文已影响0人  lkning

系统自带

import base64 

# Start your middleware class

class ProxyMiddleware(object):

    # overwrite process request

    def process_request(self, request, spider):

        # Set the location of the proxy

        request.meta['proxy'] = "http://YOUR_PROXY_IP:PORT"

   

        # Use the following lines if your proxy requires authentication

        proxy_user_pass = "USERNAME:PASSWORD"

        # setup basic authentication for the proxy

        encoded_user_pass = base64.encodestring(proxy_user_pass)

        request.headers['Proxy-Authorization'] = 'Basic ' + encoded_user_pass

自定义

class ProxyMiddleware(object):
    def process_request(self, request, spider):
        PROXIES = [
            {'ip_port': '111.11.228.75:80', 'user_pass': ''},
            {'ip_port': '120.198.243.22:80', 'user_pass': ''},
            {'ip_port': '111.8.60.9:8123', 'user_pass': ''},
            {'ip_port': '101.71.27.120:80', 'user_pass': ''},
            {'ip_port': '122.96.59.104:80', 'user_pass': ''},
            {'ip_port': '122.224.249.122:8088', 'user_pass': ''},
        ]
        proxy = random.choice(PROXIES)
        if proxy['user_pass'] is not None:
            request.meta['proxy'] = to_bytes("http://%s" % proxy['ip_port'])
            encoded_user_pass = base64.encodebytes(to_bytes(proxy['user_pass']))
            request.headers['Proxy-Authorization'] = to_bytes('Basic ' + encoded_user_pass)
        else:
            request.meta['proxy'] = to_bytes("http://%s" % proxy['ip_port'])
上一篇 下一篇

猜你喜欢

热点阅读