抓取经常访问的网站的最快响应IP
2018-05-10 本文已影响13人
十一岁的加重
代码:
#!/usr/bin/python
# coding=utf-8
import requests
import os
import re
urls = []
outputString = ''
hosts = [
'Github.com',
'github.global.ssl.fastly.net',
'weex.apache.org',
'Cnblogs.com',
'Jobbole.com',
'aliyun.com',
'Segmentfault.com',
'Csdn.net',
'Jianshu.com',
'weex-project.io',
'oschina.net',
]
regString = \
"<section class=\"panel2\">" \
"<h2 class=\"panel-head\">DNS Resource Records</h2>" \
"<div class=\"panel-body\">" \
"<table class=\"table table-striped table-h table-dnsrr panel-item\">" \
"<thead><tr><th>Name</th><th>Type</th><th>Data</th><th>TTL</th></tr></thead>" \
"<tbody id=\"dnsinfo\">" \
"<tr><td>(.*?)</td><td>(.*?)</td><td><a href=\"(.*?)\">(.*?)</a></td><td>(.*?)</td></tr>" \
"</tbody>" \
"</table>" \
"</div>" \
"</section>"
regString2 = '<h3 class=\"panel-head panel-folder\"><span class=\"icon icon-flag-(.*?)\"></span>(.*?)</h3>'
regString3 = '<h3 class=\"panel-head\"><span class=\"icon icon-flag-(.*?)\"></span>(.*?)</h3>'
regString4 = "<tr><td>(.*?)</td><td>(.*?)</td><td><a href=\"(.*?)\">(.*?)</a></td><td>(.*?)</td></tr>"
regString5 = '<h3 class=\"panel-head panel-folder\" data-fold=\"true\"><span class=\"icon icon-flag-(.*?)\"></span>(.*?)</h3>'
for host in hosts:
url = 'http://' + host + '.ipaddress.com/#ipinfo'
urls.append(url)
html = requests.get(url)
ips = re.findall(regString, html.text, re.S)
if len(ips) == 0:
ips = re.findall(regString2, html.text, re.S)
if len(ips) == 0:
ips = re.findall(regString3, html.text, re.S)
if len(ips) == 0:
ips = re.findall(regString4, html.text, re.S)
if len(ips) == 0:
pass
else:
twoIps = [ips[0][3]]
if len(ips) >= 2:
twoIps = [ips[0][3], ips[1][3]]
for ip in twoIps:
outputString = outputString + ip + ' ' + host + '\n'
else:
twoIps = [ips[0][1]]
if len(ips) >= 2:
twoIps.append(ips[1][1])
for ip in twoIps:
outputString = outputString + ip + ' ' + host + '\n'
else:
twoIps = [ips[0][1]]
if len(ips) == 1:
foldedIps = re.findall(regString5, html.text, re.S)
if len(foldedIps) >= 1:
twoIps.append(foldedIps[0][1])
elif len(ips) >= 2:
twoIps.append(ips[1][1])
for ip in twoIps:
outputString = outputString + ip + ' ' + host + '\n'
else:
twoIps = [ips[0][3]]
if len(ips) >= 2:
twoIps.append(ips[1][3])
for ip in twoIps:
outputString = outputString + ip + ' ' + host + '\n'
print outputString
效果:
image.png
意义:
这样就不用,手动去找这些常访问的网站的IP了,放到hosts文件里。
刷新DNS:
sudo dscacheutil -flushcache;sudo killall -HUP mDNSResponder;say flushed