cheerIo爬取数据并通过excel-export将结果保存到
2019-07-23 本文已影响0人
b59a2ae26f20
const request = require('request');
const cheerio = require('cheerio')
const fs = require('fs');
const { wf, excel, l } = require('./utils')
const generateHeaderByTable = (table) => {
const ths = table.find('th')
const titleList = []
for (let i = 0; i < ths.length; i++){
titleList.push(
{
caption: ths.eq(i).text().trim().replace(/\s*/g, ''),
type: 'string',
width: '30'
}
)
}
return titleList
};
const generateRowByTable = (table) => {
const tds = table.find('td');
const row = [];
for (let i = 0; i < tds.length; i++){
row.push(
tds.eq(i).text().trim().replace(/\s*/g, '')
)
}
return row
}
const getExcelData = () => {
fs.readFile('./d1.txt','utf-8', (err, data) => {
$ = cheerio.load(data)
const details = $('.details')
const titleArr = [...generateHeaderByTable(details.eq(0)),...generateHeaderByTable(details.eq(1))]
const rowArr = []
for (let i = 0; i < details.length; i = i+2){
rowArr.push([...generateRowByTable(details.eq(i)),...generateRowByTable(details.eq(i+1))])
}
excel('./index.xlsx', titleArr, rowArr)
})
}
function loadFile() {
fs.readFile('./d1.txt','utf-8', (err, data) => {
$ = cheerio.load(data)
const trs = $('tr')
let str = '';
for (let l = 0 ; l < trs.length; l++){
let boo = String($(trs[l]).find('th').eq(0).text().trim()) === '单位名称'
if(Boolean(boo)){
str += '\n'
}
str += ($(trs[l]).find('th').eq(0).text().trim()+' '+$(trs[l]).find('td').eq(0).text().trim().replace(/\s*/g,''));
str += '\n'
str += ' '
str += '\n'
if ($(trs[l]).find('th').length > 1){
str += ($(trs[l]).find('th').eq(1).text().trim()+' '+$(trs[l]).find('td').eq(1).text().trim().replace(/\s*/g,''));
str += '\n'
str += ' '
str += '\n'
}
}
wf('./d2.txt', str)
})
}
// loadFile();
getExcelData()
//getCompanyInfo('http://101.227.181.106/jsp/view/info.jsp?id=2092')