动图爬取
2019-04-24 本文已影响0人
江湖有爱
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2019/4/21 14:24
# @Author : Xinru
import requests
import re,os,time
import threading
from lxml import etree
new_flie = '动态图'
if not os.path.exists(new_flie):
os.mkdir(new_flie)
def download(filename,img):
try:
i = requests.get(img, timeout=(3, 7))
with open('./' + new_flie + '/{}'.format(filename), 'wb') as f:
f.write(i.content)
f.close()
print(filename)
except:
print('error','URL打不开',img)
def main(url,ii):
start_time = time.time()
html = requests.get(url)
html.encoding = 'gb2312'
div = etree.HTML(html.text)
imgs = div.xpath('//div[@class="text"]/p/img/@src')
names = div.xpath('//div[@class="item"]/h3/a/b/text()')
for img, name in zip(imgs, names):
# print(img, name)
img = 'https://www.zbjuran.com'+img
filename = name + '.' + img.split('.')[-1]
# print(filename)
download(filename, img)
end_time = time.time()
print('第-----'+str(ii)+'-----页下载完毕',round(end_time - start_time,1),'秒')
if __name__ == '__main__':
for ii in range(34,2527):
url = 'https://www.zbjuran.com/dongtai/list_4_'+str(ii)+'.html'
print('第-----%d-----页 准备' % ii)
main(url,ii)