通过图片搜索引擎自动下载图片

2023-07-01  本文已影响0人  孙庚辛
# coding=utf8
import os
import time
import requests
import json
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import WebDriverException
from bs4 import BeautifulSoup

def download_images(keyword, num_images):
    url = f"https://www.bing.com/images/search?q={keyword}"
    driver = webdriver.Chrome()  # or webdriver.Chrome(), depending on your browser
    driver.get(url)

    wait = WebDriverWait(driver, 10)
    wait.until(EC.presence_of_element_located((By.TAG_NAME, 'body')))

    # Scroll down several times to load more images
    for _ in range(10):
        try:
            time.sleep(2)  # wait for 2 seconds
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(1)  # pause between scrolls
        except WebDriverException:
            continue

    html = driver.page_source
    soup = BeautifulSoup(html, "html.parser")
    img_tags = soup.find_all("a", {"class": "iusc"})

    os.makedirs(keyword, exist_ok=True)

    count = 0
    for i, img in enumerate(img_tags):
        try:
            img_info = json.loads(img["m"])
            img_url = img_info["murl"]
            response = requests.get(img_url, stream=True)
            if response.status_code == 200:
                with open(f"{keyword}/{keyword}_{i}.jpg", "wb") as out_file:
                    out_file.write(response.content)
                count += 1
                if count >= num_images:
                    break
        except:
            pass

    driver.quit()

download_images("关键词", 20)
上一篇 下一篇

猜你喜欢

热点阅读