爬取课程试题

2021-05-18  本文已影响0人  Noza_ea8f
from selenium import webdriver
import pandas as pd
import sys
from lib import user_info


def wait_key():
    while True:
        message = input('核对无误请输入“空格”:')
        if message == ' ':
            break
        elif message == 'q':
            sys.exit(0)
        print('你输入的是 ' + message + '!')


# 打开网址的URL
# base_url = "http://passport.ouchn.cn"
base_url = "http://thome.ouchn.cn/"

# 打开浏览器
driver = webdriver.Chrome()
# driver = webdriver.Firefox()
driver.implicitly_wait(10)

# 清除所有cookie
driver.delete_all_cookies()
driver.get(base_url)
print(driver.get_cookies())

# 输入用户名密码
driver.find_element_by_id('username').send_keys(user_info.username)
driver.find_element_by_id('password').send_keys(user_info.password)

# 登录
driver.find_element_by_css_selector('body > div > div > div > form > div > div > div:nth-child(4) > button').click()

# 点击“进入”
driver.find_element_by_css_selector(
    '.teachtable > table:nth-child(1) > tbody:nth-child(1) > tr:nth-child(4) > td:nth-child(5) > a:nth-child(1)').click()


def crawl_data():
    # 获取最新窗口
    handles = driver.window_handles  # 获得所有窗口句柄
    driver.switch_to.window(handles[-1])  # 取最新的

    # 获取网页信息
    # txt = driver.page_source
    # 获取试题
    exams = driver.find_elements_by_xpath('//div[@class="qtext"]')
    exams_ls = []
    for i in exams:
        exams_ls.append(i.text)
    # 获取答案
    ansers = driver.find_elements_by_xpath('//div[@class="rightanswer"]')
    for i in ansers:
        exams_ls.append(i.text)
    # data = dict(zip(exams_ls, answers_ls))
    df = pd.DataFrame({'试题': exams_ls,})
    return df


# 等待手工选择
# 形考任务2
wait_key()
df = crawl_data()

writer = pd.ExcelWriter('xx.xlsx')
df.to_excel(writer, index=False)
writer.save()
上一篇下一篇

猜你喜欢

热点阅读