Python爬虫之爬取不得姐图片
2017-08-17 本文已影响45人
weizujie
上一个爬段子的,我们将它改改就可以爬图片了。
贴代码:
import requests
from bs4 import BeautifulSoup
import re
import os
images = []
def get_images(url):
headers = {'User-Agent':'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Mobile Safari/537.36'}
html = requests.get(url, headers=headers)
html.encoding = 'utf-8'
soup = BeautifulSoup(html.text, 'lxml')
find_images = soup.find('div',{'class':'j-r-list'}).find_all('img')
for url in find_images:
images.append(url['data-original'])
return images
def save_img():
for i,url in enumerate(images):
pic = requests.get(url).content
with open('./' + str(i) + '.jpg', 'wb') as f:
f.write(pic)
def main():
url = 'http://www.budejie.com/pic/'
endpage = int(input('请输入要下载的页数:'))
for i in range(1, endpage + 1):
try:
get_images(url + str(i))
print('正在下载第%d页的图片'%i)
except:
pass
print('下载完成')
save_img()
if __name__ == '__main__':
main()