爬虫验证码之--点触验证码

2018-07-03  本文已影响0人  strive鱼

该类验证码则以12306最为典型,TouClick(https://www.touclick.com/)提供了该类验证码,下面的例子就以该网站为准
该类验证码的破解还是基于selenium/webdriver

破解的思路就是将验证码图片提交给平台(崔庆才的书中推荐超级鹰/https://www.chaojiying.com/price.html),平台会返回识别结果在图片中的坐标位置,然后再解析坐标模拟登陆

第一步:先注册超级鹰账号并申请软件ID(https://www.chaojiying.com/user/reg/),再充值一些题分
第二步:下载对应的python API,链接为https://www.chaojiying.com/api-14.html,对其进行修改,修改后的代码如下:

import requests
from hashlib import md5


class Chaojiying_Client(object):

    def __init__(self, username, password, soft_id):
        self.username = username
        self.password = md5(password.encoding('utf-8')).hexdigest()
        self.soft_id = soft_id
        self.base_params = {
            'user': self.username,
            'pass2': self.password,
            'softid': self.soft_id,
        }
        self.headers = {
            'Connection': 'Keep-Alive',
            'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)',
        }

    def PostPic(self, im, codetype):
        """
        im: 图片字节
        codetype: 题目类型 参考 http://www.chaojiying.com/price.html
        """
        params = {
            'codetype': codetype,
        }
        params.update(self.base_params)
        files = {'userfile': ('ccc.jpg', im)}
        r = requests.post('http://upload.chaojiying.net/Upload/Processing.php', data=params, files=files, headers=self.headers)
        return r.json()

    def ReportError(self, im_id):
        """
        im_id:报错题目的图片ID
        """
        params = {
            'id': im_id,
        }
        params.update(self.base_params)
        r = requests.post('http://upload.chaojiying.net/Upload/ReportError.php', data=params, headers=self.headers)
        return r.json()


if __name__ == '__main__':
    chaojiying = Chaojiying_Client('超级鹰用户名', '超级鹰用户名的密码', '96001')
    im = open('a.jpg', 'rb').read()
    print chaojiying.PostPic(im, 1902)

在此之前需要稍微了解一下selenium 中ActionChains的使用方法

click(on_element=None) ——单击鼠标左键

click_and_hold(on_element=None) ——点击鼠标左键,不松开

context_click(on_element=None) ——点击鼠标右键

double_click(on_element=None) ——双击鼠标左键

drag_and_drop(source, target) ——拖拽到某个元素然后松开

drag_and_drop_by_offset(source, xoffset, yoffset) ——拖拽到某个坐标然后松开

key_down(value, element=None) ——按下某个键盘上的键

key_up(value, element=None) ——松开某个键

move_by_offset(xoffset, yoffset) ——鼠标从当前位置移动到某个坐标

move_to_element(to_element) ——鼠标移动到某个元素

move_to_element_with_offset(to_element, xoffset, yoffset) ——移动到距某个元素(左上角坐标)多少距离的位置

perform() ——执行链中的所有动作

release(on_element=None) ——在某个元素位置松开鼠标左键

send_keys(*keys_to_send) ——发送某个键到当前焦点的元素

send_keys_to_element(element, *keys_to_send) ——发送某个键到指定元素

下面为代码的实现和注释(声明:以后的爬虫代码均来自崔庆才的爬虫书籍的自我总结,并非原创

import time 
from io import BytesIO
from PIL import Image
from selenium import webdriver 
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By 
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from chaojiying import Chaojiying 

EEMAIL = 'xxxxx'
PASSWORD = 'xxxx'

CHAOJIYING_USERNAME = 'xxxx'
CHAOJIYING_PASSWORD = 'xxxxx'
CHAOJIYING_SOFT_ID = 896781#自己申请的ID 
CHAOJIYING_KIND = 9102#要验证的点出验证码的类型

class cracktouclick(object):
    def __init__(self):
        self.url='http://admin.touclick.com/login.html'
        self.browser=webdriver.Chrome()
        self.wait= WebDriverWait(self.browser,20)
        self.email = EMAIL
        self.password = PASSWORD
        self.chaojiying = Chaojiying(CHAOJIYING_USERNAME, CHAOJIYING_PASSWORD, CHAOJIYING_SOFT_ID)
        
    def __del__(self):
        self.browser.close()
        
    def open(self):#打开需要验证的网页
        self.browser.get(self.url)
        email = self.wait.until(EC.presence_of_element_located((By.ID, 'email')))#先识别输入框
        password = self.wait.until(EC.presence_of_element_located((By.ID, 'password')))#先识别输入框
        email.send_keys(self.email)
        password.send_keys(self.password)#键入内容
        
    def get_touclick_button(self):#获取验证码的按钮
        button = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'touclick-hod-wrap')))
        return button
    
    def get_touclick_element(self):#该函数的作用是获取跳出的验证图片
        element=self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'touclick-pub-content')))#获取图片
        return element 
    
    def get_position(self):#获取字段的位置,即验证码的位置
        element=self.get_touclick_element()
        time.sleep(2)
        location=element.location
        size=element.size
        top, bottom, left, right = location['y'], location['y'] + size['height'], location['x'], location['x'] + size['width']
        return (top,bottom,left,right)
    
    def get_screenshot(self):#获取验证码的截图
        screenshot=self.browser.get_screenshot_as_png()
        screenshot = Image.open(BytesIO(screenshot))
        return screenshot 
    
    def get_touclick_image(self,name='captcha.png'):#获取验证码图片
        top,bottom,left,right=self.get_position()
        print('验证码位置:',top,bottom,left,right)
        screenshot=self.get_screenshot()
        captcha=screenshot.crop((top,left,right,bottom))
        captcha.save(name)
        return captcha
    
    """
    接下来要做的就是将超级鹰识别的文字的位置(以字符串的形式返回)进行解析,然后模拟点击
    形式为'pic_str':'132,127|56,77'
    """
    def get_points(self,captcha_result):#里面的参数为上述的识别结果,函数返回的是转化处理后的结果
        groups=captcha_result.get('pic_str').split('|')#返回的是分割后的列表
        locations=[[int(number) for number in group.split(',')] for group in groups]
        return locations
    
    def touch_click_words(self,locations):#下面就是模拟点击图片中文字的位置'
        for location in locations:
            print (location)
            ActionChains(self.browser).move_to_element_with_offset(self.get_touclick_element(),location[0],location[1]).click().perform()
            time.sleep(1)
            
    def touch_click_verify(self):#每点击字段,就会出现一个圆圈,这个就是验证按钮
        button=self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'touclick-pub-submit')))#先识别这个按钮是否出现
        button.click()
        
    def login(self):#当验证成功之后,就可以点击登陆按钮实现登陆了
        submit = self.wait.until(EC.element_to_be_clickable((By.ID, '_submit')))#submit实际就是按钮的定位
        submit.click()
        time.sleep(10)
        print('登录成功')
    
    def crack(self):#程序开启入口
        self.open()#进入登陆界面
        button=self.get_touclick_button()#开始识别验证按钮
        button.click()#点击该按钮
        image=self.get_touclick_image()
        """
        接下来就要用超级鹰Chaojiying类里的post_pic方法(参数为需要传入识别的图片对象,以及该类验证码的代号
        在充值提分后,可以查询到
        """
        bytes_array=BytesIO()
        image.save(bytes_array,format='PNG')
        result = self.chaojiying.post_pic(bytes_array.getvalue(), CHAOJIYING_KIND)
        print(result)#返回的是json 类型的
        locations=self.get_points(result)#获得解析后的结果
        self.touch_click_words(locations)
        self.touch_click_verify()
        success=self.wait.until(EC.text_to_be_present_in_element((By.CLASS_NAME, 'touclick-hod-note'), '验证成功'))
        print(success)
        if not success:
            self.crack()
        else:
            self.login()
            
            
            
if __name__=='__main__':
    crack=cracktouclick()#实例化
    crack.crack()
    ```
        
    
        
        
        
    
        
        
        
上一篇 下一篇

猜你喜欢

热点阅读