无标题文章【Python爬虫】- 第13次作业

2017-09-06 本文已影响5人 Allen_Zhao

# hw_13

#抓取网页数据

importrequests

url ='http://www.ygdy8.com/'

headers = {

'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',

'Accept-Encoding':'gzip, deflate',

'Accept-Language':'en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4',

'Cache-Control':'max-age=0',

'Connection':'keep-alive',

'DNT':'1',

'Host':'www.ygdy8.com',

'If-Modified-Since':'Tue, 05 Sep 2017 14:46:00 GMT',

'If-None-Match':'"0448db05526d31:530"',

'Referer':'https://www.google.com/',

'Upgrade-Insecure-Requests':'1',

'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36',

}

req = requests.get(url,headers=headers)

status_code=req.status_code

print(status_code)

req.encoding ='gb2312'

html=req.text

print(html)

#把抓取到的网页数据写入到指定的文件夹存为HTML 格式

fp=open(r'C:\Users\haoran\Desktop\Python study\dy.html','w',encoding='utf-8')

fp.write(html)

fp.close