Python

2017-05-19  本文已影响0人  星云之水

1.由KB号取得KB的中文/日语名称

正则,文件读写,爬虫(代理)
python2.7测试通过
将要读的KB号写入文件kb.txt,然后查看kbnum.txt

# -*- coding:utf-8 -*-
import urllib
import urllib2
import re,sys,os
reload(sys)                     #设置UTF8,避免乱码
sys.setdefaultencoding('utf8')  #设置UTF8,避免乱码
def list(url):
    file_object = open('kbnum.txt', 'a')
    #headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3013.3 Safari/537.36'}
    request=urllib2.Request(url,headers={'User-Agent':'User-Agent:Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3095.5 Safari/537.36','Accept-Language':'ja-JP'})#'zh-CN,zh;q=0.8'    
    response = urllib2.urlopen(request)    
    pageCode = response.read()    
    #print pageCode#.encode('utf-8')    
    pattern=re.compile('"heading": "(.*?)",',re.S)#匹配换行    
    items=re.findall(pattern,pageCode)  
    for item in items:        
        #print url[-8:] + " " + item.decode('utf-8')
        str=url[-8:-1] + "\t" + item.decode('utf-8')+"\n"
        file_object.write(str)  
    file_object.close( )    

def filelist():
    os.remove('kbnum.txt')
    f=open ('kb.txt')
    for line in f:
        url1="https://support.microsoft.com/api/content/help/" + line
        list(url1)
        
if __name__=='__main__':    
    filelist()
fdfdf
上一篇下一篇

猜你喜欢

热点阅读