用page.on方法,将网页response写入csv

2024-01-06  本文已影响0人  粥一样温柔
from playwright.sync_api import sync_playwright
import csv

# 创建 CSV 文件并写入表头
with open('responses.csv', 'w', newline='', encoding='utf-8') as csvfile:
    fieldnames = ['url', 'status']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()

    with sync_playwright() as p:
        browser = p.chromium.launch(
            headless=False,
            args=['--start-maximized'],
            channel="chrome"
        )
        context = browser.new_context(no_viewport=True)
        page = context.new_page()

        def add_url_status(response):
            url = response.url
            status = response.status

            # 写入数据到 CSV 文件
            writer.writerow({'url': url, 'status': status})

        page.on("response", lambda response: add_url_status(response))
        page.goto("https://www.douban.com/")

        # 等待所有响应处理完毕
        page.wait_for_load_state("networkidle")

        # 关闭浏览器上下文
        browser.close()

上一篇 下一篇

猜你喜欢

热点阅读