cheerIo爬取数据并通过excel-export将结果保存到

2019-07-23  本文已影响0人  b59a2ae26f20
const request = require('request');
const cheerio = require('cheerio')
const fs = require('fs');
const { wf, excel, l } = require('./utils')

const generateHeaderByTable = (table) => {
    const ths = table.find('th')
    const titleList = []
    for (let i = 0; i < ths.length; i++){
        titleList.push(
                {
                    caption: ths.eq(i).text().trim().replace(/\s*/g, ''),
                    type: 'string',
                    width: '30'
                }
            )
    }
    return titleList
};

const generateRowByTable = (table) => {
    const tds = table.find('td');
    const row = [];
    for (let i = 0; i < tds.length; i++){
        row.push(
            tds.eq(i).text().trim().replace(/\s*/g, '')
        )
    }
    return row
}

const getExcelData = () => {
    fs.readFile('./d1.txt','utf-8', (err, data) => {
        $ = cheerio.load(data)
        const details = $('.details')
        const titleArr = [...generateHeaderByTable(details.eq(0)),...generateHeaderByTable(details.eq(1))]
        const rowArr = []
        for (let i = 0; i < details.length; i = i+2){
            rowArr.push([...generateRowByTable(details.eq(i)),...generateRowByTable(details.eq(i+1))])
        }
        excel('./index.xlsx', titleArr, rowArr)
    })
}

function loadFile() {
    fs.readFile('./d1.txt','utf-8', (err, data) => {
        $ = cheerio.load(data)
        const trs = $('tr')
        let str = '';
        for (let l = 0 ; l < trs.length; l++){
            let boo = String($(trs[l]).find('th').eq(0).text().trim()) === '单位名称'
            if(Boolean(boo)){
                str += '\n'
            }
            str += ($(trs[l]).find('th').eq(0).text().trim()+' '+$(trs[l]).find('td').eq(0).text().trim().replace(/\s*/g,''));
            str += '\n'
            str += ' '
            str += '\n'
            if ($(trs[l]).find('th').length > 1){
                str += ($(trs[l]).find('th').eq(1).text().trim()+' '+$(trs[l]).find('td').eq(1).text().trim().replace(/\s*/g,''));
                str += '\n'
                str += ' '
                str += '\n'
            }
        }
        wf('./d2.txt', str)
    })
}
// loadFile();
getExcelData()
//getCompanyInfo('http://101.227.181.106/jsp/view/info.jsp?id=2092')
上一篇下一篇

猜你喜欢

热点阅读