爬取课程试题
2021-05-18 本文已影响0人
Noza_ea8f
from selenium import webdriver
import pandas as pd
import sys
from lib import user_info
def wait_key():
while True:
message = input('核对无误请输入“空格”:')
if message == ' ':
break
elif message == 'q':
sys.exit(0)
print('你输入的是 ' + message + '!')
# 打开网址的URL
# base_url = "http://passport.ouchn.cn"
base_url = "http://thome.ouchn.cn/"
# 打开浏览器
driver = webdriver.Chrome()
# driver = webdriver.Firefox()
driver.implicitly_wait(10)
# 清除所有cookie
driver.delete_all_cookies()
driver.get(base_url)
print(driver.get_cookies())
# 输入用户名密码
driver.find_element_by_id('username').send_keys(user_info.username)
driver.find_element_by_id('password').send_keys(user_info.password)
# 登录
driver.find_element_by_css_selector('body > div > div > div > form > div > div > div:nth-child(4) > button').click()
# 点击“进入”
driver.find_element_by_css_selector(
'.teachtable > table:nth-child(1) > tbody:nth-child(1) > tr:nth-child(4) > td:nth-child(5) > a:nth-child(1)').click()
def crawl_data():
# 获取最新窗口
handles = driver.window_handles # 获得所有窗口句柄
driver.switch_to.window(handles[-1]) # 取最新的
# 获取网页信息
# txt = driver.page_source
# 获取试题
exams = driver.find_elements_by_xpath('//div[@class="qtext"]')
exams_ls = []
for i in exams:
exams_ls.append(i.text)
# 获取答案
ansers = driver.find_elements_by_xpath('//div[@class="rightanswer"]')
for i in ansers:
exams_ls.append(i.text)
# data = dict(zip(exams_ls, answers_ls))
df = pd.DataFrame({'试题': exams_ls,})
return df
# 等待手工选择
# 形考任务2
wait_key()
df = crawl_data()
writer = pd.ExcelWriter('xx.xlsx')
df.to_excel(writer, index=False)
writer.save()