爬虫

如何验证西刺代理,选出好用的

2016-12-16  本文已影响0人  木马音响积木

注意是window 环境下 ,程序运行完毕后,选出来的这些地址,都是不好用的,
请高手帮助,指导如何选出好用的地址,
我的代码有什么问题,如果您能指出,感谢,
100个的ip 是从西刺代理api 直接取出来的,感谢西刺代理, 我们同时也希望每个地址都是好用的.

106.91.35.28:8998
121.31.48.6:8123
203.115.102.146:8080
183.144.40.128:8998
106.91.21.92:8998
95.173.179.54:1881
171.38.207.76:8123
183.66.93.105:8998
80.91.188.46:3128
115.201.149.59:8998
190.248.134.246:8080
60.250.81.97:80


#!C:\Python35\python.exe
# coding=utf-8
# encoding=utf8
from bs4 import BeautifulSoup
import urllib
import requests
import socket
import traceback
import sys
import lxml

# User_Agent = 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36'
# header = {}
# header['User-Agent'] = User_Agent
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36'
}

#获取所有代理IP地址

def getProxyIp():
    proxy = []
    for i in range(1, 66):
        try:
            url = 'http://www.xicidaili.com/nn/' + str(i)
            # url = 'http://www.xicidaili.com/nn/66'
            req = requests.get(url, headers = headers)
            soup = BeautifulSoup(req.text ,'lxml')
            print(soup)
            print(i)
            ips = soup.findAll('tr')
            for x in range(1, len(ips)):
                ip = ips[x]
                tds = ip.findAll("td")
                ip_temp = tds[1].contents[0] + ":" + tds[2].contents[0]
                proxy.append(ip_temp)
        except:
            continue
    return proxy
    #print(proxy)


# 验证获得的代理IP地址是否可用


def validateIp(proxy):

    url = "http://ip.chinaz.com/getip.aspx"
    f = open("d:\ip.txt", "w")
    socket.setdefaulttimeout(3)
    print(len(proxy))
    #for i in range(0,99):
        #kk=proxy.split()[i]
    for i in range(0, len(proxy)):
        try:
            #ip = proxy[i].strip().split("\t")
            #print(ip)
            #proxy_host = "http://" + ip[0] + ":" + ip[1]
            proxy_host = proxy.split()[i]
            proxy_temp = {"http": proxy_host}
            print(proxy_temp)
            #res = urllib.urlopen(url, proxies = proxy_temp).read()
            wb_data = requests.get(url, headers=headers, proxies=proxy_temp)
            soup = BeautifulSoup(wb_data.text, 'lxml')
            print(soup)
            f.write(proxy_host + '\n')
            print(proxy[i])
        except:
        #except Exception, e:
            continue
    f.close()

proxylist ='''
183.144.36.48:8998
114.104.51.245:8998
106.91.35.28:8998
222.188.88.10:8998
119.53.129.180:8118
183.66.84.249:8998
106.91.34.54:8998
27.18.130.134:8998
183.144.51.19:8998
113.250.102.245:8998
113.251.175.198:8998
180.251.72.9:8080
115.225.197.22:8998
183.144.35.127:8998
113.251.176.62:8998
113.251.158.228:8998
58.217.184.48:8998
121.31.48.6:8123
183.66.75.28:8998
183.66.82.7:8998
203.115.102.146:8080
183.144.52.196:8998
58.217.79.117:8998
122.244.7.127:8998
183.144.46.154:8998
114.106.179.118:8998
114.106.86.70:8998
180.242.113.29:8080
106.91.114.21:8998
123.97.16.94:8998
111.79.244.182:8998
119.85.176.197:8998
106.91.42.81:8998
36.56.231.53:8998
171.38.197.200:8123
183.144.39.7:8998
183.144.40.128:8998
109.224.39.75:8080
183.66.74.170:8998
106.91.21.92:8998
180.136.105.220:8998
183.140.84.115:3128
95.173.179.54:1881
121.31.139.113:8123
115.225.112.233:8998
115.225.70.205:8998
106.91.17.203:8998
219.223.42.160:8998
121.31.177.236:8123
106.91.45.37:8998
171.38.207.76:8123
27.18.184.241:8998
183.66.93.105:8998
171.13.58.98:8998
210.101.131.231:8080
183.66.82.193:8998
106.91.43.131:8998
125.85.183.9:8998
115.200.118.163:8998
121.61.96.7:8118
36.68.243.120:8080
120.1.34.49:8118
80.91.188.46:3128
114.106.46.229:8998
106.91.31.146:8998
122.244.193.210:8998
114.106.190.50:8998
123.97.8.121:8998
125.126.126.244:8998
36.68.247.17:8080
183.66.73.61:8998
106.91.33.69:8998
183.185.25.227:9797
106.91.20.225:8998
119.129.116.112:9797
106.91.43.6:8998
118.123.45.228:8998
123.97.21.185:8998
183.66.53.186:8998
183.66.91.143:8998
183.140.82.77:3128
125.108.166.42:8998
183.66.91.123:8998
106.91.30.87:8998
183.144.196.163:8998
116.225.250.226:63000
115.201.149.59:8998
111.78.128.25:8998
190.248.134.246:8080
27.21.205.246:8998
60.250.81.97:80
183.66.90.123:8998
183.144.42.167:8998
123.97.19.95:8998
106.91.24.148:8998
117.65.107.135:8998
182.88.205.48:8123
183.140.86.97:3128
114.106.206.157:8998
83.68.39.26:3128
'''

if __name__ == '__main__':

    #proxy = getProxyIp()
    #kk=proxylist.split()
        #print(kk)
    validateIp(proxylist)

上一篇下一篇

猜你喜欢

热点阅读