2019-01-26 基金项目标题一键式下载
2019-01-26 本文已影响0人
快乐的夏天_eaf9
为日常随笔,仅是自己日常工作的随手一记,并非系统性的知识介绍,如有不慎浏览至此的同学,可以屏蔽。
import os
from docx import Document
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
os.chdir(r'C:\Users\MTC\Desktop\New')
if not os.path.exists(r'基金.docx'):
document=Document()
document.save(r'基金.docx')
browser=webdriver.Chrome()
browser.get('http://fund.sciencenet.cn/')
document=Document(r'基金.docx')
Num=38
page = 0
for i in range(0, Num):
page = page + 1
b = browser.find_elements_by_class_name('t')
for i in range(1, len(b)):
print(b[i].text)
paragraph = document.add_paragraph(b[i].text)
paragraph.runs[0].bold = True
document.save(r'基金.docx')
# b[i].find_elements_by_tag_name('a')[0].click()
# Wait=WebDriverWait(browser,20)
# Wait.until(EC.presence_of_all_elements_located(By.XPath,'/html/body/div[4]/p'))
#
# c = browser.find_elements_by_class_name("jben")
# for ii in c:
# document.add_paragraph(ii.text)
# b[i].find_elements_by_tag_name('a')[0].click()
# time.sleep(5)
# c = browser.find_elements_by_class_name("jben")
# content_xia=c.text
# document.add_paragraph(content_xia)
# browser.back()
bnts=browser.find_elements_by_class_name('btn')
for iii in bnts:
if iii.text=='下一页':
print(iii.text)
iii.click()
break
Wait = WebDriverWait(browser, 20)
Wait.until(EC.presence_of_all_elements_located((By.XPATH, '//*[@id="ft"]/span[3]')))
2019-1-26
办公室散记