Python

抓取经常访问的网站的最快响应IP

2018-05-10  本文已影响13人  十一岁的加重

代码:

#!/usr/bin/python
# coding=utf-8
import requests
import os
import re

urls = []
outputString = ''
hosts = [
    'Github.com',
    'github.global.ssl.fastly.net',
    'weex.apache.org',
    'Cnblogs.com',
    'Jobbole.com',
    'aliyun.com',
    'Segmentfault.com',
    'Csdn.net',
    'Jianshu.com',
    'weex-project.io',
    'oschina.net',
    ]
regString = \
    "<section class=\"panel2\">" \
    "<h2 class=\"panel-head\">DNS Resource Records</h2>" \
    "<div class=\"panel-body\">" \
    "<table class=\"table table-striped table-h table-dnsrr panel-item\">" \
    "<thead><tr><th>Name</th><th>Type</th><th>Data</th><th>TTL</th></tr></thead>" \
    "<tbody id=\"dnsinfo\">" \
    "<tr><td>(.*?)</td><td>(.*?)</td><td><a href=\"(.*?)\">(.*?)</a></td><td>(.*?)</td></tr>" \
    "</tbody>" \
    "</table>" \
    "</div>" \
    "</section>" 
regString2 = '<h3 class=\"panel-head panel-folder\"><span class=\"icon icon-flag-(.*?)\"></span>(.*?)</h3>'
regString3 = '<h3 class=\"panel-head\"><span class=\"icon icon-flag-(.*?)\"></span>(.*?)</h3>'
regString4 = "<tr><td>(.*?)</td><td>(.*?)</td><td><a href=\"(.*?)\">(.*?)</a></td><td>(.*?)</td></tr>"
regString5 = '<h3 class=\"panel-head panel-folder\" data-fold=\"true\"><span class=\"icon icon-flag-(.*?)\"></span>(.*?)</h3>'

for host in hosts:
    url = 'http://' + host + '.ipaddress.com/#ipinfo'
    urls.append(url)
    html = requests.get(url)
    ips = re.findall(regString, html.text, re.S)
    if len(ips) == 0:
        ips = re.findall(regString2, html.text, re.S)
        if len(ips) == 0:
            ips = re.findall(regString3, html.text, re.S)
            if len(ips) == 0:
                ips = re.findall(regString4, html.text, re.S)
                if len(ips) == 0:
                    pass
                else:
                    twoIps = [ips[0][3]]
                    if len(ips) >= 2:
                        twoIps = [ips[0][3], ips[1][3]]
                    for ip in twoIps:
                        outputString = outputString + ip + ' ' + host + '\n'
            else:
                twoIps = [ips[0][1]]
                if len(ips) >= 2:
                    twoIps.append(ips[1][1])
                for ip in twoIps:
                    outputString = outputString + ip + ' ' + host + '\n'
        else:
            twoIps = [ips[0][1]]
            if len(ips) == 1:
                foldedIps = re.findall(regString5, html.text, re.S)
                if len(foldedIps) >= 1:
                    twoIps.append(foldedIps[0][1])
            elif len(ips) >= 2:
                twoIps.append(ips[1][1])
            for ip in twoIps:
                outputString = outputString + ip + ' ' + host + '\n'
    else:
        twoIps = [ips[0][3]]
        if len(ips) >= 2:
            twoIps.append(ips[1][3])
        for ip in twoIps:
            outputString = outputString + ip + ' ' + host + '\n'

print outputString

效果:


image.png

意义:
这样就不用,手动去找这些常访问的网站的IP了,放到hosts文件里。
刷新DNS:

sudo dscacheutil -flushcache;sudo killall -HUP mDNSResponder;say flushed
上一篇 下一篇

猜你喜欢

热点阅读