fetch google arts and culture sm
2019-07-05 本文已影响0人
狼无雨雪
获取google arts and culture的 Ukiyo风格的小图程序
"""
really used in fetching url from https://artsandculture.google.com/entity/m0bwbv?categoryid=art-movement
"""
from selenium import webdriver
import time
import os
import re
from bs4 import BeautifulSoup
from selenium.webdriver.chrome.options import Options
# os.environ["PATH"] += os.pathsep + 'D:\google-art-downloader-master'
chrome_options = Options()
chrome_options.add_argument('--headless')
browser = webdriver.Chrome(chrome_options = chrome_options)
browser.get('https://artsandculture.google.com/entity/m0bwbv?categoryid=art-movement')
asserts_all=set()
while browser.find_element_by_xpath('//*[@id="exp_tab_popular"]/div/div/div[2]').get_attribute("aria-hidden") != "true":
pageSource = browser.page_source
soup = BeautifulSoup(pageSource,'lxml')
asserts = soup.find_all('a',{'class':"e0WtYb kdYEFe ZEnmnd PJLMUc"})
for assert_value in asserts:
asserts_all.add(assert_value.get("style"))
print(len(asserts_all))
browser.find_element_by_xpath('//*[@id="exp_tab_popular"]/div/div/div[2]').click()
time.sleep(2)
google_arts_images_urls = set()
for line in asserts_all:
google_arts_images_urls.add(re.findall(r"\"(.+)\"",line, re.S)[0])
with open("google_arts_images_urls.txt",'w',encoding="utf8") as write_file:
for line in google_arts_images_urls:
write_file.write(line+"\n")
browser.close()
url : https://artsandculture.google.com/entity/m0bwbv?categoryid=art-movement
