python爬取表情包
从斗图啦网站爬取表情包
import requests
from lxml import etree
import time
for i in range(1,6):
url ='https://www.doutula.com/article/list/?page={}'.format(i)
headers = {'Referer':'https://www.doutula.com/',
'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36'}
resp = requests.get(url,headers=headers)
html = etree.HTML(resp.text)
srcs = html.xpath('//img/@data-original')
for src in srcs :
filename = src.split('/')[-1]
img = requests.get(src,headers=headers)
with open('imgs/'+filename,'wb')as file:
file.write(img.content)
print(src,filename)
新东西是Referer,wb,'imgs/'+filename,img.content, srcs = html.xpath('//img/@data-original'), filename = src.split('/')[-1]