Python
2017-05-19 本文已影响0人
星云之水
1.由KB号取得KB的中文/日语名称
正则,文件读写,爬虫(代理)
python2.7测试通过
将要读的KB号写入文件kb.txt,然后查看kbnum.txt
# -*- coding:utf-8 -*-
import urllib
import urllib2
import re,sys,os
reload(sys) #设置UTF8,避免乱码
sys.setdefaultencoding('utf8') #设置UTF8,避免乱码
def list(url):
file_object = open('kbnum.txt', 'a')
#headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3013.3 Safari/537.36'}
request=urllib2.Request(url,headers={'User-Agent':'User-Agent:Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3095.5 Safari/537.36','Accept-Language':'ja-JP'})#'zh-CN,zh;q=0.8'
response = urllib2.urlopen(request)
pageCode = response.read()
#print pageCode#.encode('utf-8')
pattern=re.compile('"heading": "(.*?)",',re.S)#匹配换行
items=re.findall(pattern,pageCode)
for item in items:
#print url[-8:] + " " + item.decode('utf-8')
str=url[-8:-1] + "\t" + item.decode('utf-8')+"\n"
file_object.write(str)
file_object.close( )
def filelist():
os.remove('kbnum.txt')
f=open ('kb.txt')
for line in f:
url1="https://support.microsoft.com/api/content/help/" + line
list(url1)
if __name__=='__main__':
filelist()
fdfdf