nodejs随记

chrome extensions cdp(通过debugger

2020-09-25  本文已影响0人  LCSan

在爬虫开发过程中,踩过无数的坑,与目标方斗智斗勇。总结来看终归是成本的博弈,不管开发成本,时间成本,空间成本,拟或是其他。方案万千,权衡后低成本拿下才是王道,当成本超过预期也就放弃挣扎了。

在某些安全至上的行业,时间、空间成本通常只能往后靠。优先安全的情况下,完全模拟用户行为几乎是最为有效的。想想那些各种加密,接口鉴权,请求策略…,为了最后几根头发妥协吧!

本文是cdp第三种使用方式,目的都是为了嗅探接口数据(为啥嗅探?你去看看强鉴权的网站)。某些变态的网站,selenium cdp、chrome带启动参数cdp都会检测,导致登录或鉴权错误。走投无路的情况下,采用了chrome扩展,通过debugger来实现。

浏览器启动

未避免debugger弹调试弹框,启动项增加--silent-debugger-extension-api配置,如下:

# 跨域、debugger api、指定嗅探扩展目录
"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe" --disable-web-security --user-data-dir="d:\aaa" --silent-debugger-extension-api --load-extension="d:\嗅探"

manifest.json

{
   "background": {
      "persistent": true,
      "scripts": [ "background.js" ]
   },
   "browser_action": {
      "default_icon": "icon_38.png"
   },
   "content_security_policy": "script-src 'self' 'unsafe-eval'; object-src 'self'",
   "description": "网页嗅探",
   "icons": {
      "128": "icon.png",
      "16": "icon.png",
      "48": "icon.png"
   },
   "manifest_version": 2,
   "name": "网页嗅探",
   "permissions": [ "debugger", "storage", "notifications", "tts", "webRequest", "webRequestBlocking", "http://*/*","https://*/*", "tabs", "contextMenus", "webNavigation", "clipboardWrite", "clipboardRead" ],
   "short_name": "网页嗅探",
   "version": "2.7"
}

background.js

本来是准备用Native Message的,但是太复杂了。索性用nodejs简单搭了个web服务用着,脚手架也方便。

console.log("开启嗅探");

//循环定时器,获取规则
var timename = setInterval(initRule, 1500);

// 跨域jsonp
function xhr(url, method, data, callback) {
    url = url || "http://127.0.0.1:8080/";
    method = method || "POST";
    data = data ? (typeof (data) == "object" ? JSON.stringify(data) : data) : "";
    var xhr = new XMLHttpRequest();
    xhr.open(method, url, true);
    xhr.setRequestHeader("Content-Type", "application/json");
    xhr.onload = callback || function () {
        console.log(this.responseText);
    };
    xhr.send(data);
}

// 初始化规则
function initRule() {
    var url = "http://127.0.0.1:8080/rule";
    var callback = function () {
        // 删除定时循环
        clearInterval(timename);
        var rule = this.responseText;
        rule = JSON.parse(rule);
        console.log(rule);
        handleRule(rule);
    };
    xhr(url, "GET", "", callback);
}

// 规则解析
function handleRule(rule) {
    // tab页嗅探规则,指定对哪些域名对应的页面嗅探
    var domain = rule["tabDomain"];
    domain.forEach(function (i, b, c) {
        c[b] = i.replace(/\./ig, "\\.");
    });
    domain = domain.join("|");
    var w_Domain = new RegExp("^https?:\\/\\/(" + domain + ")");
    var b_Domain = new RegExp("^https?:\\/\\/(?!" + domain + ")");
    // 嗅探url过滤
    var fu_Filters = rule["fetchUrlFilters"];
    fu_Filters.forEach(function (i, b, c) {
        c[b] = new RegExp(i);
    });
    initListener(w_Domain, b_Domain, fu_Filters);
}

// 过滤嗅探到的url,
function fetchUrlFilter(fu_Filters, url) {
    for (ft in fu_Filters) {
        if (fu_Filters[ft].test(url)) {
            return true;
        }
    }
    return false;
}

// 创建监听
function initListener(w_Domain, b_Domain, fu_Filters) {
    // 全局tab缓存
    var tab_cache = {};

    // tab页存在更新动作
    chrome.tabs.onUpdated.addListener(function (id, info, tab) {
        console.log(id + ":tab 更新\t" + JSON.stringify(info));
        //var index = global_tab.indexOf(id);
        var index = tab_cache[id + ""];
        // 释放监听
        if (info.status == "loading" && index && b_Domain.test(tab["url"])) {
            chrome.debugger.detach({
                "tabId": id
            }, function () {
                console.log(id + ":tab debugger解绑");
                // 删除监控,这里不安全,异步没有保护。可能其他删除,导致下标不一致。
                delete tab_cache[id + ""]["fetch_urls"];
                delete tab_cache[id + ""]["webSocket_urls"];
                delete tab_cache[id + ""];
            });
        } else if (info.status == "loading" && !index && w_Domain.test(tab["url"])) {
            // 添加监听
            tab_cache[id + ""] = {};
            tab_cache[id + ""]["fetch_urls"] = {};
            tab_cache[id + ""]["webSocket_urls"] = {};

            chrome.debugger.attach({
                "tabId": id
            }, "1.0", function () {
                console.log(id + ":tab debugger绑定");
                chrome.debugger.sendCommand({
                    "tabId": id
                }, "Network.enable", {}, function () {
                    console.log(id + ":tab Network.enable");
                    chrome.debugger.onEvent.addListener(function (source, method, params) {
                        console.log(id + ":tab debugger event fetch");
                        var requestId = params.requestId;
                        var fetch_urls = tab_cache[id + ""]["fetch_urls"];
                        var webSocket_urls = tab_cache[id + ""]["webSocket_urls"];

                        switch (method) {
                            case "Network.requestWillBeSent":
                                var feg = fetchUrlFilter(fu_Filters, params.request.url);
                                if (feg) {
                                    fetch_urls[requestId] = {};
                                    fetch_urls[requestId]["request"] = params["request"];
                                }
                                break;
                            case "Network.responseReceived":
                                if (fetch_urls[requestId]) {
                                    fetch_urls[requestId]["ResponseHeaders"] = params["response"]["headers"];
                                }
                                break;
                            case "Network.loadingFinished":
                                if (fetch_urls[requestId]) {
                                    console.log(method + "\t" + fetch_urls[requestId].request
                                        .url);
                                    chrome.debugger.sendCommand(source,
                                        "Network.getResponseBody", {
                                            "requestId": requestId
                                        },
                                        function (response) {
                                            var body = {};
                                            fetch_urls[requestId]["tabId"] = source.tabId;
                                            fetch_urls[requestId]["response"] = response;
                                            var callback = function () {
                                                console.log(this.responseText);
                                                // 删除requestId,减小缓存
                                                delete fetch_urls[requestId];
                                            };
                                            // 传输嗅探结果
                                            xhr(null, null, fetch_urls[requestId],
                                                callback);
                                        });
                                }
                                break;
                            case "Network.webSocketCreated":
                                var feg = fetchUrlFilter(fu_Filters, params.url);
                                if (feg) {
                                    webSocket_urls[requestId] = params.url;
                                }
                                break;
                            case "Network.webSocketFrameReceived":
                                if (webSocket_urls[requestId]) {
                                    var data = params;
                                    data["url"] = webSocket_urls[params.requestId];
                                    data["tabId"] = source.tabId;
                                    // 传输嗅探结果,websocket是长链接,不能删缓存
                                    xhr(null, null, data);
                                }
                                break;
                            default:
                                break;
                        }
                    });
                });
            });
        }
    });

    // Cleanup the variables when a tab is closed
    chrome.tabs.onRemoved.addListener(function (id) {
        console.log(id + ":tab 关闭");
        var index = tab_cache[id + ""];
        if (index) {
            delete tab_cache[id + ""]["fetch_urls"];
            delete tab_cache[id + ""]["webSocket_urls"];
            delete tab_cache[id + ""];
        }
    });
}

rule.json 嗅探规则

每当浏览器启动时,扩展会每1.5秒请求一次rule规则,直到请求到为止。

{
    "tabDomain": [
        "xxxx.xxxx.com",
        "xxxx.xxxx.com"
    ],
    "fetchUrlFilters": [
        "\\.json",
    ]
}

rule.json说明:
1、浏览器嗅规则,扩展在启动后每隔1.5秒获取一次,请求成功则停止获取。如果想新规则生效,则重启浏览器。
2、浏览器启动参数附带--silent-debugger-extension-api,解决浏览器弹调试框问题。
3、规则:
tabDomain:要嗅探的域名,这里必须是完整的域名。对应浏览器的地址栏的url,进行域名过滤。tab刷新会实时监控,不用担心重复会丢失嗅探。
fetchUrlFilters:要抓取的目标请求地址,正则表达式,用来过滤出要抓取的内容。

最后

嗅探的结构会post到web服务,至于怎么处理嗅探结果,就具体分析了。

上一篇 下一篇

猜你喜欢

热点阅读