汽车之家车型参数配置数据爬取
import scrapy
import json
import re
import execjs
import demjson
关键代码:
def parse_config(self, response):
s = re.findall(r'<script>\(function\((.*?)</script>', response.text, re.M | re.S)
ss =""
for j in range(0, len(s)):
ss +="(function(" + s[j]
jsstr ='var jsdom = require("jsdom");var { JSDOM } = jsdom; var dom = new JSDOM(); window = dom.window; document = window.document;'
jsstr += ss
xx ='''function getcss() {
s = "";
for(x in document.styleSheets){
for (j in document.styleSheets[x]){
//if(document.styleSheets[x][j] instanceof CSSRuleList){
xx = document.styleSheets[x][j];
for(ss in xx){
s += xx[ss].cssText;
}
//}
}
}
return s;
}'''
jsstr += xx
jsexe = execjs.compile(jsstr, cwd='./node_modules')
cssstr = jsexe.call("getcss")
print(cssstr)
解决字体css替换问题
需要安装node.js
导入jsdom模块到当前目录下,用来运行js代码,获取生成的css代码