通过图片搜索引擎自动下载图片
2023-07-01 本文已影响0人
孙庚辛
# coding=utf8
import os
import time
import requests
import json
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import WebDriverException
from bs4 import BeautifulSoup
def download_images(keyword, num_images):
url = f"https://www.bing.com/images/search?q={keyword}"
driver = webdriver.Chrome() # or webdriver.Chrome(), depending on your browser
driver.get(url)
wait = WebDriverWait(driver, 10)
wait.until(EC.presence_of_element_located((By.TAG_NAME, 'body')))
# Scroll down several times to load more images
for _ in range(10):
try:
time.sleep(2) # wait for 2 seconds
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(1) # pause between scrolls
except WebDriverException:
continue
html = driver.page_source
soup = BeautifulSoup(html, "html.parser")
img_tags = soup.find_all("a", {"class": "iusc"})
os.makedirs(keyword, exist_ok=True)
count = 0
for i, img in enumerate(img_tags):
try:
img_info = json.loads(img["m"])
img_url = img_info["murl"]
response = requests.get(img_url, stream=True)
if response.status_code == 200:
with open(f"{keyword}/{keyword}_{i}.jpg", "wb") as out_file:
out_file.write(response.content)
count += 1
if count >= num_images:
break
except:
pass
driver.quit()
download_images("关键词", 20)