nodeJS数据抓取

2017-05-08  本文已影响1066人  9ac64e1f7a99

工具

项目目录结构

项目目录结构
package.json
babel

watcher.js

const chokidar = require('chokidar');
const shell = require('shelljs');


const watcher = chokidar.watch('.', {
  ignored: [
    /[\/\\]\./,
    /node_modules/,
    /vscode/,
    /babelrc/,
    /watcher.js/,
    /package.json/
    ], persistent: true
});

const log = console.log.bind(console);
const modify = () => {
  shell.exec('clear && npm start');
}

watcher
  .on('add', function(path) { log('File', path, 'has been added'); }) 
  .on('addDir', function(path) {  log('Directory', path, 'has been added'); })
  .on('change', function(path) { modify();})
  .on('unlink', function(path) {  log('File', path, 'has been removed'); })
  .on('unlinkDir', function(path) {  log('Directory', path, 'has been removed'); })
  .on('error', function(error) { log('Error happened', error); })
  .on('ready', function() {   log('Initial scan complete. Ready for changes.'); }); 
  // .on('raw', function(event, path, details) { log('Raw event info:', event, path, details); }) 


modify();

getPage.js

const http = require("http");

// Utility function that downloads a URL and invokes
// callback with the data.
function download(url, callback) {
  http.get(url, function(res) {
    let data = "";
    res.on('data', function (chunk) {
      data += chunk;
    });
    res.on("end", function() {
      callback(data);
    });
  }).on("error", function() {
    callback(null);
  });
}

export default download;

index.js 数据抓取

// 抓取虾米主页的新碟首发
const cheerio = require("cheerio");
const fs = require('fs');
const path = require('path');
import getPage from './util/getPage';

const URL = 'http://www.xiami.com/';

getPage(URL, (data) => {
  const jsonObj = [];
  if(data) {
    const $ = cheerio.load(data);
    $('#albums').find('.content_block').children(function(i, e){
      const $image = $(e).find('.image');
      const $info = $image.next();
      jsonObj.push({
        img: $image.children('img').attr('src'),
        url: URL + $image.children('a').attr('href'),
        name: $info.find('a').text()
      });
    });
  }
  // 将抓取的数据写入的文件中去
  fs.writeFile(path.resolve(__dirname, 'test.json'), JSON.stringify(jsonObj), (err) => {
    console.log(err);
  });
});
上一篇下一篇

猜你喜欢

热点阅读