使用puppeteer爬取spa单页(vue/react)

2021-04-02  本文已影响0人  夜空中乄最亮的星

自己搭建puppeteer 抓取spa单页折腾快1天了,最终的问题是在命令行下始终无法启动chrome浏览器,但是在桌面环境(centos7)是可以正常运行的,还是使用docker简单:

拉取镜像

docker pull docker.io/alekzonder/puppeteer

编写脚本f.js

const puppeteer = require('puppeteer');

async function getVideo() {

  const browser = await puppeteer.launch({ignoreHTTPSErrors:true,timeout:30000,args: ['--no-sandbox', '--disable-setuid-sandbox']});
  const page = await browser.newPage();
  await page.goto('https://v.douyin.com/eYc8Gcv/')  //抖音视频页面
  const allResultsSelector = '.video-player video';
  await page.waitForSelector(allResultsSelector);
  const aHandle = await page.evaluateHandle(() => document.body);
  const resultHandle = await page.evaluateHandle(body => body.innerHTML, aHandle);
  console.log(await resultHandle.jsonValue());
  await resultHandle.dispose();
  await browser.close();
}

getVideo()

测试

docker run -i --init --rm --cap-add=SYS_ADMIN    --name puppeteer-chrome docker.io/alekzonder/puppeteer     node -e "`cat f.js`"

//输出:


image.png

其他脚本

拦截响应:

const puppeteer = require('puppeteer');
(async () => {
    try {
        const browser = await puppeteer.launch({ignoreHTTPSErrors:true,timeout:30000,args: ['--no-sandbox', '--disable-setuid-sandbox']});
        const page = await browser.newPage();
        await page.setRequestInterception(true);


        await page.on('request', request => {

            if (request.url() === 'https://video.kuaishou.com/graphql') {
                console.log(request.url());
                console.log("拦截到了这条url然后就该请求了");
                page.on('response', response => {
                    if (response.url() === 'https://video.kuaishou.com/graphql') {
                        //const req = response.request();
                        let message = response.text();
                        message.then(function (result1) {
                            results = result1;
                            console.log(results)
                            
                        });
                    }
 
                });
            }
            request.continue();
        })



        await page.goto('https://v.kuaishou.com/dYaBzk');
        await page.screenshot({path: 'news.png', fullPage: true});
        await browser.close();
    } catch (e) {
        console.log(e);
    }

})();
上一篇下一篇

猜你喜欢

热点阅读