Python

Google Image Download By Python

2017-12-05  本文已影响0人  Colin_0463

Dear all:

1. Python Version and Libraries

1.1 Version

1.2 Libraries

pip install selenium

2. Full Code

from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
import time
import urllib
import sys

# search URL
# 要搜索的网址 
Url_Behind = "https://www.google.co.jp/search?q="
Url_SearchTerm = sys.argv[1] # 要搜索的词条 Search Term
Url_After = "&newwindow=1&source=lnms&tbm=isch&sa=X&ved=0ahUKEwjTqs6EwPDXAhXCa7wKHQzNAn0Q_AUICigB&biw=1920&bih=1069"

searchUrl = Url_Behind + Url_SearchTerm + Url_After
print(searchUrl)

# Chrome驱动器下载,配置过程详见下个章节
# Chrome Driver, How to configure it, please see next chapter 
path = 'D:\\DATA\\chromedriver.exe'

# start Chrome
# 启动 Chrome
driver = webdriver.Chrome(path)

# Maximize the website window
# 网站窗口最大化, 因为每次爬取只能看到视窗内的图片
driver.maximize_window()

# start search
# 开始搜索
driver.get(searchUrl)

# scroll postion
# 模拟滚动窗口以浏览下载更多图片 
pos = 0

# image id, 要下载的图片编号
m = 0 

img_url_list = []

for i in range(20):
    pos += i*500 # scroll down
    js = "document.documentElement.scrollTop=%d" % pos
    driver.execute_script(js)
    time.sleep(1)
    # Find Image
    # 找到图片
    ELEMENT = driver.find_elements_by_tag_name("img")

    for element in ELEMENT:
        # find image download URL
        # 找到图片下载地址
        IMG_URL = element.get_attribute('src')
        if type(IMG_URL) == str and IMG_URL[8:17] == "encrypted":
            img_url = IMG_URL
            if img_url != None and (img_url not in img_url_list):
                img_url_list.append(img_url)
                m += 1
                # Image save Dir and filename,could set by your self
                # 图片保存地址和图片名,可以自己设置
                filename = "D:\\DATA\\PicCrawler\\" + Url_SearchTerm + str(m) + ".jpg"
                urllib.request.urlretrieve(img_url,filename)
                print("Save Picture %s" %filename)
    # show more images on webpage
    # 点击网页的“显示更多图片按钮”,显示更多图片继续下载
    click_btn = driver.find_element_by_id('smb')
    ActionChains(driver).click(click_btn).perform()

# Close webpage
# 关闭网页
driver.close()

3. Usage

3.1 Download Chrome driver

path = 'D:\\DATA\\chromedriver.exe'

3.2 Install selenium library

pip install selenium

3.3 Start Download

python filename.py soccer

4. Discussion

Sorry for absent our discussion, Hope it could help.**

上一篇 下一篇

猜你喜欢

热点阅读