python爬虫

selenium破解网易易盾滑块

2020-07-24  本文已影响0人  苦海飘摇

前言

之前由于工作原因做过极验验证的滑块验证码,该网站的滑块验证码是直接能提取出全图片和缺口图片,利用pillow模块的ImageChops.difference差值算法解决起来比较简单;后来发现了网易易盾这种滑块验证码,他只能提取两个图片的链接(缺口图片和待滑动的小图片)。本文就是网易易盾滑块的破解。

获取滑块验证码的图片

def get_img(self, target, template, xp):  # 参数分别为两个待保存的图片名,和缺口图片的像素(长)
    target_link = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'yidun_bg-img'))).get_attribute('src')  # 获取缺口图片链接
    template_link = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'yidun_jigsaw'))).get_attribute('src')  # 获取待滑动图片链接
    target_img = Image.open(BytesIO(requests.get(target_link).content))  # 从网页端读取图片
    template_img = Image.open(BytesIO(requests.get(template_link).content))
    target_img.save(target)  # 保存图片
    template_img.save(template)
    size_loc = target_img.size  # 获取图片大小
    zoom = xp / int(size_loc[0])  # 耦合像素
    return zoom

去除待滑动的图片的黑边

def change_size(self, file):
    image = cv2.imread(file, 1)  # 读取图片 image_name应该是变量
    img = cv2.medianBlur(image, 5)  # 中值滤波,去除黑色边际中可能含有的噪声干扰
    b = cv2.threshold(img, 15, 255, cv2.THRESH_BINARY)  # 调整裁剪效果
    binary_image = b[1]  # 二值图--具有三通道
    binary_image = cv2.cvtColor(binary_image, cv2.COLOR_BGR2GRAY)
    x, y = binary_image.shape
    edges_x = []
    edges_y = []
    for i in range(x):
        for j in range(y):
            if binary_image[i][j] == 255:
                edges_x.append(i)
                edges_y.append(j)
    left = min(edges_x)  # 左边界
    right = max(edges_x)  # 右边界
    width = right - left  # 宽度
    bottom = min(edges_y)  # 底部
    top = max(edges_y)  # 顶部
    height = top - bottom  # 高度
    pre1_picture = image[left:left + width, bottom:bottom + height]  # 图片截取
    return pre1_picture  # 返回图片数据

算出缺口的位置

def match(self, target, template):
    img_gray = cv2.imread(target, 0)
    img_rgb = self.change_size(template)
    template = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY)
    res = cv2.matchTemplate(img_gray, template, cv2.TM_CCOEFF_NORMED)
    run = 1
    # 使用二分法查找阈值的精确值
    L = 0
    R = 1
    while run < 20:
        run += 1
        threshold = (R + L) / 2
        if threshold < 0:
            print('Error')
            return None
        loc = np.where(res >= threshold)
        if len(loc[1]) > 1:
            L += (R - L) / 2
        elif len(loc[1]) == 1:
            break
        elif len(loc[1]) < 1:
            R -= (R - L) / 2
    return loc[1][0]

模拟变加速

def get_tracks(self, distance, seconds, ease_func):
    distance += 20
    tracks = [0]
    offsets = [0]
    for t in np.arange(0.0, seconds, 0.1):
        ease = ease_func
        offset = round(ease(t / seconds) * distance)
        tracks.append(offset - offsets[-1])
        offsets.append(offset)
    tracks.extend([-3, -2, -3, -2, -2, -2, -2, -1, -0, -1, -1, -1])
    return tracks

def ease_out_quart(self, x):
    return 1 - pow(1 - x, 4)

完整代码

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Descr : 
# @Atten : 
# @Author: luqin
# @File  : wyyd.py
# @Vers  : 1.0
# @Time  : 2020-07-23 18:13
import os
import cv2
import time
import random
import requests
import numpy as np
from PIL import Image
from io import BytesIO
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver import ActionChains
from common.selenium_spider import SeleniumSpider
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC


class CrackSlider():
    def __init__(self):
        desired_capabilities = webdriver.DesiredCapabilities.CHROME.copy()
        desired_capabilities['chromeOptions'] = {
            'args': [
                '--window-size=1400,850',
                # '--proxy-server=http://127.0.0.1:8888'  # 加代理方便抓包
            ],
            'extensions': []
        }
        self.path = os.path.join(os.path.dirname(__file__), "common/chromedriver")
        self.driver = SeleniumSpider(
            desired_capabilities=desired_capabilities,
            path='/usr/local/bin/chromedriver')  # driver的路径
        self.url = 'http://dun.163.com/trial/jigsaw'  # 测试网站
        self.wait = WebDriverWait(self.driver, 20)
        self.driver.get(self.url)

    def get_img(self, target, template, xp):
        target_link = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'yidun_bg-img'))).get_attribute('src')
        template_link = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'yidun_jigsaw'))).get_attribute('src')
        target_img = Image.open(BytesIO(requests.get(target_link).content))
        template_img = Image.open(BytesIO(requests.get(template_link).content))
        target_img.save(target)
        template_img.save(template)
        size_loc = target_img.size
        zoom = xp / int(size_loc[0])  # 耦合像素
        return zoom

    def change_size(self, file):
        image = cv2.imread(file, 1)  # 读取图片 image_name应该是变量
        img = cv2.medianBlur(image, 5)  # 中值滤波,去除黑色边际中可能含有的噪声干扰
        b = cv2.threshold(img, 15, 255, cv2.THRESH_BINARY)  # 调整裁剪效果
        binary_image = b[1]  # 二值图--具有三通道
        binary_image = cv2.cvtColor(binary_image, cv2.COLOR_BGR2GRAY)
        x, y = binary_image.shape
        edges_x = []
        edges_y = []
        for i in range(x):
            for j in range(y):
                if binary_image[i][j] == 255:
                    edges_x.append(i)
                    edges_y.append(j)

        left = min(edges_x)  # 左边界
        right = max(edges_x)  # 右边界
        width = right - left  # 宽度
        bottom = min(edges_y)  # 底部
        top = max(edges_y)  # 顶部
        height = top - bottom  # 高度
        pre1_picture = image[left:left + width, bottom:bottom + height]  # 图片截取
        return pre1_picture  # 返回图片数据

    def match(self, target, template):
        img_gray = cv2.imread(target, 0)
        img_rgb = self.change_size(template)
        template = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY)
        # cv2.imshow('template', template)
        # cv2.waitKey(0)
        res = cv2.matchTemplate(img_gray, template, cv2.TM_CCOEFF_NORMED)
        run = 1

        # 使用二分法查找阈值的精确值
        L = 0
        R = 1
        while run < 20:
            run += 1
            threshold = (R + L) / 2
            if threshold < 0:
                print('Error')
                return None
            loc = np.where(res >= threshold)
            if len(loc[1]) > 1:
                L += (R - L) / 2
            elif len(loc[1]) == 1:
                break
            elif len(loc[1]) < 1:
                R -= (R - L) / 2
        return loc[1][0]

    def move_to_gap(self, tracks):
        slider = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'yidun_slider')))
        ActionChains(self.driver).click_and_hold(slider).perform()
        while tracks:
            x = tracks.pop(0)
            ActionChains(self.driver).move_by_offset(xoffset=x, yoffset=0).perform()
            time.sleep(0.05)
        time.sleep(0.05)
        ActionChains(self.driver).release().perform()

    def get_tracks(self, distance, seconds, ease_func):
        distance += 20
        tracks = [0]
        offsets = [0]
        for t in np.arange(0.0, seconds, 0.1):
            ease = ease_func
            offset = round(ease(t / seconds) * distance)
            tracks.append(offset - offsets[-1])
            offsets.append(offset)
        tracks.extend([-3, -2, -3, -2, -2, -2, -2, -1, -0, -1, -1, -1])
        return tracks

    def ease_out_quart(self, x):
        return 1 - pow(1 - x, 4)


if __name__ == '__main__':
    xp = 320  # 验证码的像素-长
    target = 'target.jpg'  # 临时保存的图片名
    template = 'template.png'  # 临时保存的图片名

    cs = CrackSlider()
    zoom = cs.get_img(target, template, xp)
    distance = cs.match(target, template)
    track = cs.get_tracks((distance + 7) * zoom, random.randint(2, 4), cs.ease_out_quart)
    cs.move_to_gap(track)
    time.sleep(2)
    cs.driver.close()

上一篇 下一篇

猜你喜欢

热点阅读