在子进程中启动爬虫脚本
2018-08-22 本文已影响0人
noyanse
cp.js
const cp = require('child_process')
const { resolve } = require('path')
;(async () => {
const script = resolve(__dirname,'./crawler1')
const child = cp.fork(script, [])
let invoked = false
child.on('error', err => {
if(invoked) return true
invoked = true
console.log(err)
})
child.on('exit', code => {
if(invoked) return
invoked = true
let err = code === 0 ? null : new Error('exit code' + code)
console.log(err)
})
child.on('message', data => {
let result = data.result
console.log(result)
})
})()
crawler.js
const url = `https://movie.douban.com/tag/#/?sort=R&range=6,10&tags=`
const puppeteer = require('puppeteer')
const sleep = time => new Promise(resolve => {
setTimeout(resolve, time)
})
;(async () => {
console.log('Start visit the target page')
const browser = await puppeteer.launch({
args: ['--no-sandbox']
})
const page = await browser.newPage()
await page.goto(url, {
waitUntil: 'networkidle2'
})
await sleep(3000)
await page.waitForSelector('.more')
for (let i = 0; i < 1; i++) {
await sleep(3000)
await page.click('.more')
}
const result = await page.evaluate(() => {
var $ = window.$
var items = $('.list-wp a')
var links = []
if (items.length >= 1) {
items.each((index, item) => {
let it = $(item)
let doubanId = it.find('div').data('id')
let title = it.find('.title').text()
let rate = Number(it.find('.rate').text())
let poster = it.find('img').attr('src').replace('s_ratio', 'l_ratio')
links.push({
doubanId,
title,
rate,
poster
})
})
}
return links
})
await browser.close()
// console.log(result)
process.send({result})
process.exit(0)
})()