py爬虫

python 百度翻译js逆向

2022-02-09  本文已影响0人  a十二_4765

百度翻译

输入中文点击翻译会出来英文 f12 查看请求的参数

关键参数是token跟sign

token在当前页面查看源代码会看到

所以只要sign是加密的所以需要逆向这个

第二步

经过分析发现 sign在success里面生成的 点击进入success那个js

第三步

全局搜sign找到后打断点进行调试 这个L就是我们需要的方法。跳转到这个L

这个e就是我们需要的L ,拿出这个js方法到鬼鬼js调试工具去

他提示i未定义,经过多次调试发现i就等于 320305.131321201 然后直接定义i变量 var i = "320305.131321201"

最后发现这个js就是我们所需要的js

最后封装下js就可以了 baidu.js

function a(r) {

        if (Array.isArray(r)) {

            for (var o = 0, t = Array(r.length); o < r.length; o++)

                t[o] = r[o];

            return t

        }

        return Array.from(r)

    }

    function n(r, o) {

        for (var t = 0; t < o.length - 2; t += 3) {

            var a = o.charAt(t + 2);

            a = a >= "a" ? a.charCodeAt(0) - 87 : Number(a),

            a = "+" === o.charAt(t + 1) ? r >>> a : r << a,

            r = "+" === o.charAt(t) ? r + a & 4294967295 : r ^ a

        }

        return r

    }

function e(r) {

var i = "320305.131321201"

        var o = r.match(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g);

        if (null === o) {

            var t = r.length;

            t > 30 && (r = "" + r.substr(0, 10) + r.substr(Math.floor(t / 2) - 5, 10) + r.substr(-10, 10))

        } else {

            for (var e = r.split(/[\uD800-\uDBFF][\uDC00-\uDFFF]/), C = 0, h = e.length, f = []; h > C; C++)

                "" !== e[C] && f.push.apply(f, a(e[C].split(""))),

                C !== h - 1 && f.push(o[C]);

            var g = f.length;

            g > 30 && (r = f.slice(0, 10).join("") + f.slice(Math.floor(g / 2) - 5, Math.floor(g / 2) + 5).join("") + f.slice(-10).join(""))

        }

        var u = void 0

          , l = "" + String.fromCharCode(103) + String.fromCharCode(116) + String.fromCharCode(107);

        u = null !== i ? i : (i = window[l] || "") || "";

        // var u = void 0, i = null;

        // u = null !== i ? i : (i = "320305.131321201" || "") || "";

        for (var d = u.split("."), m = Number(d[0]) || 0, s = Number(d[1]) || 0, S = [], c = 0, v = 0; v < r.length; v++) {

            var A = r.charCodeAt(v);

            128 > A ? S[c++] = A : (2048 > A ? S[c++] = A >> 6 | 192 : (55296 === (64512 & A) && v + 1 < r.length && 56320 === (64512 & r.charCodeAt(v + 1)) ? (A = 65536 + ((1023 & A) << 10) + (1023 & r.charCodeAt(++v)),

            S[c++] = A >> 18 | 240,

            S[c++] = A >> 12 & 63 | 128) : S[c++] = A >> 12 | 224,

            S[c++] = A >> 6 & 63 | 128),

            S[c++] = 63 & A | 128)

        }

        for (var p = m, F = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(97) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(54)), D = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(51) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(98)) + ("" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(102)), b = 0; b < S.length; b++)

            p += S[b],

            p = n(p, F);

        return p = n(p, D),

        p ^= s,

        0 > p && (p = (2147483647 & p) + 2147483648),

        p %= 1e6,

        p.toString() + "." + (p ^ m)

    }

python 代码

import execjs

import requests

query = '抖音'

with open('baidu.js', 'r', encoding='utf-8') as f:

    ctx = execjs.compile(f.read())

sign = ctx.call('e', query)

print(sign)

获取到url后进行请求

url ='https://fanyi.baidu.com/v2transapi?from=zh&to=en'

headers ={

    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0",

    "Cookie": "FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; REALTIME_TRANS_SWITCH=1; APPGUIDE_10_0_2=1; PSTM=1642830257; BIDUPSID=0E2AC3B62017F8384FE230D137E46F00; __yjs_duid=1_379aa65c8f4ccd7e3e4ffa6f999984041642901294369; BDUSS=UZVZkRuOG5UMVFCNFhreHVGenJ3LW9kbzh1dTFnbGFvcFJ4NjQybmJGYjYwaHhpRVFBQUFBJCQAAAAAAAAAAAEAAAADX~Kgyq62~rjnuOc4AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAPpF9WH6RfVhe; BDUSS_BFESS=UZVZkRuOG5UMVFCNFhreHVGenJ3LW9kbzh1dTFnbGFvcFJ4NjQybmJGYjYwaHhpRVFBQUFBJCQAAAAAAAAAAAEAAAADX~Kgyq62~rjnuOc4AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAPpF9WH6RfVhe; BAIDUID=4E3AF32C0D796DD08243DC6E37CD01A7:FG=1; BDSFRCVID=Dl0OJeC627PLIo6D0nAvYtkDKEE17loTH6aorkmEKU9S4RWVoMJzEG0PKM8g0Kub4BcaogKKymOTHrAF_2uxOjjg8UtVJeC6EG0Ptf8g0f5; H_BDCLCKID_SF=tbAeVC8-JKL3q-I4q47D5nL--fIX-PRjf2nQop7F5l8-hC3MXMrojx3Qh4QqXR3W3g34KCD5an7xOKQphn5OhfLU3HrgtpjUQDOuatcN3KJmOpC9bT3v5tj3LxbK2-biW55L2Mbd-qjP_IoG2Mn8M4bb3qOpBtQmJeTxoUJ25DnJhbLGe4bK-Tryea_8qx5; BDSFRCVID_BFESS=Dl0OJeC627PLIo6D0nAvYtkDKEE17loTH6aorkmEKU9S4RWVoMJzEG0PKM8g0Kub4BcaogKKymOTHrAF_2uxOjjg8UtVJeC6EG0Ptf8g0f5; H_BDCLCKID_SF_BFESS=tbAeVC8-JKL3q-I4q47D5nL--fIX-PRjf2nQop7F5l8-hC3MXMrojx3Qh4QqXR3W3g34KCD5an7xOKQphn5OhfLU3HrgtpjUQDOuatcN3KJmOpC9bT3v5tj3LxbK2-biW55L2Mbd-qjP_IoG2Mn8M4bb3qOpBtQmJeTxoUJ25DnJhbLGe4bK-Tryea_8qx5; BAIDUID_BFESS=4E3AF32C0D796DD08243DC6E37CD01A7:FG=1; BDRCVFR[feWj1Vr5u3D]=I67x6TjHwwYf0; delPer=0; PSINO=5; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1643252312,1643252321,1644383242,1644385296; BDRCVFR[EiXQVvOKA3D]=mk3SLVN4HKm; H_PS_PSSID=35106_31254_34584_35490_35246_35323_26350_35744; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; BA_HECTOR=8185a5ag8120ah84mf1h06o3q0r; ab_sr=1.0.1_YjVhYzc2YjQyMjNjZDhlZjIyMWYzMmU5YTczOTdhZTlmMGJiZThiMWY3MGFiNmZjZjE0NDFiYTA3Mzg0MjI4Yzc1NjMwYjBkY2I2NWM3YmFiN2ZkNTk2YWU2OGJhYmJkZDdmZjQ2YTljYjJhMmQ2ZTI2NDRiYWM4OTkyMjEzNmQzODMxZThiNTI5ZDExOGYzYzUzNTI3NmE2YWUzZjQ3YjcwNjYzMWE4NGNhMDliOGJlY2Y5MjY0NmUxOWI4M2Jh; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1644390814"

    }

data = {

            "from": "zh",

            "to": "en",

            "query": query,  # query 即我们要翻译的的内容

            "transtype": "translang",

            "simple_means_flag": "3",

            "sign": sign,  # sign 是变化的需要我们执行js代码得到

            "token": "d1f1ec159f0e0a2012baa175ff956fb9"

        }

response = requests.post(url, headers=headers, data=data).json()

print(response)

最后的返回内容
上一篇 下一篇

猜你喜欢

热点阅读