puppeteer实现node爬虫
2018-03-27 本文已影响0人
猫久伴你入眠
image.png
image.png
const puppeteer = require('puppeteer');
const url = 'https://movie.douban.com/explore#!type=movie&tag=%E7%83%AD%E9%97%A8&sort=recommend&page_limit=20&page_start=0';
//延迟时间
let sleep = time => new Promise(resolve => {
setTimeout(resolve, time);
});
(async() => {
console.log("开始")
//模拟打开浏览器
const browser = await puppeteer.launch({
args: ['--no-sandbox'],
dumpio: false
});
//模拟创建新页面,并打开链接
const page = await browser.newPage();
await page.goto(url, {
waitUntil: 'networkidle2'
});
//等待3秒
await sleep(3000);
// await page.screenshot({ path: 'example.png' });
//等待 .more Dom元素加载完成
await page.waitForSelector('.more');
for (let i = 0; i < 1; i++) {
await sleep(3000);
await page.click('.more');
}
const result = await page.evaluate(() => {
//进行dom遍历
var $ = window.$;
var links = [];
var list = $('.list a');
list.each(function(idx, item) {
var id = $(this).find('.cover-wp').data('id');
var title = $(this).find('img').attr('alt');
var imgUrl = $(this).find('img').attr('src').replace('s_ratio', 'l_ratio');
var rate = Number($(this).find('strong').text());
links.push({
id: id,
title: title,
imgUrl: imgUrl,
rate: rate
})
});
return links;
});
console.log(result)
await browser.close();
})();