爬取百度的例子
2017-06-29 本文已影响0人
海藻web开发
var page = require('webpage').create(),
system = require('system'),
address='https://www.baidu.com/s?wd=';
var fs = require('fs');
if (system.args.length === 1) {
console.log('NO-KEY');
phantom.exit();
}
address += encodeURIComponent(system.args[1]);
var oldTime=new Date();
page.open(address, function(status) {
if (status !== 'success') {
console.log('失败了~');
} else {
var json=page.evaluate(function() {
var t=[];
var dom=document.getElementById('content_left').children;
console.log(dom.length)
for(var i=0;i<dom.length;i++){
var o=new Object();
o.title=dom[i].children[0].innerText;
o.info=dom[i].children[1].innerText;
o.link=dom[i].getElementsByTagName('a')[0].getAttribute('href');
var pic=dom[i].getElementsByTagName('img');
if(pic.length>0){
o.pic=pic[0].getAttribute('src');
}else{
o.pic=null;
}
t.push(o);
}
var oo=new Object();
oo.code=0;
oo.msg='抓取成功~';
oo.word=document.getElementById('kw').value;
oo.dataList=t;
return oo;
});
json.time=new Date-oldTime;
fs.write('f:/2222.txt', JSON.stringify(json), 'w');
}
phantom.exit();
});