【python+selenium】点触文字验证码的识别

2019-05-24  本文已影响0人  米兰的小铁匠
效果示例

预·思路

要做的并不多,封装类,方法各司其职!验证的关键部位(文字坐标)交给第三方平台;

国家企业信用信息公示系统网站为例,点击查询就会产生验证界面!

有一些编写程序时候的问题,在最后说明;


正·代码

# 注册超级鹰,进入页面生成软件id
CHAOJIYING_USERNAME = ''
CHAOJIYING_PASSWORD = ''
CHAOJIYING_SOFT_ID = 
CHAOJIYING_KIND_FOUR = 9004  # 1~4 9004    #1~5 9201
CHAOJIYING_KIND_FIVE = 9008  # 5~8 9008
DATA_TEXT = '小米科技'
class CrackTouClick():
    def __init__(self):
        self.url = 'http://www.gsxt.gov.cn/index.html'
        self.browser = webdriver.Chrome()
        self.wait = WebDriverWait(self.browser, 20)
        self.chaojiying = Chaojiying(CHAOJIYING_USERNAME, CHAOJIYING_PASSWORD, CHAOJIYING_SOFT_ID)
    
    def __del__(self):
        self.browser.close()
    
    def open(self):
        """
        打开网页输入用户名密码
        :return: None
        """
        self.browser.get(self.url)

    def text_input(self):
        '''
        输入查询标签
        :return: 
        '''
        input = self.wait.until(EC.presence_of_element_located((By.ID, 'keyword')))
        return input

{'err_no': 0, 'err_str': 'OK', 'pic_id': '3068920282435800059', 'pic_str': '203,298|67,264|242,60|238,260', 'md5': '6330865e91232108ae828df101edff12'}

    def get_touclick_button(self):
        """
        获取初始验证按钮
        :return:
        """
        time.sleep(3)
        button = self.wait.until(EC.element_to_be_clickable((By.ID, 'btn_query')))
        return button
    
    def get_touclick_element(self):
        """
        获取验证图片对象
        :return: 图片对象,图片链接
        """
        element = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'geetest_item_img')))
        url_http = element.get_attribute('src')
        #print(url_http)
        return element,url_http

    def get_points(self, captcha_result):
        """
        解析识别结果
        :param captcha_result: 识别结果
        :return: 转化后的结果
        """
        groups = captcha_result.get('pic_str').split('|')
        locations = [[int(number) for number in group.split(',')] for group in groups]
        return locations
    
    def touch_click_words(self, locations):
        """
        点击验证图片
        :param locations: 点击位置
        :return: None
        """
        for location in locations:
            print(location)
            element,url_http = self.get_touclick_element()
            ActionChains(self.browser).move_to_element_with_offset(element, location[0],
                                                                   location[1]).click().perform()
            time.sleep(0.5)
    
    def touch_click_verify(self):
        """
        点击验证按钮
        :return: None
        """
        button = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'geetest_commit_tip')))
        button.click()

    def request_download(self,IMAGE_URL):
        '''
        获取图片链接转换字节流,传入超级鹰
        :return:
        '''
        r = requests.get(IMAGE_URL)
        res = requests.get(IMAGE_URL,stream=True)  # 获取字节流最好加stream这个参数,原因见requests官方文档

        byte_stream = BytesIO(res.content)  # 把请求到的数据转换为Bytes字节流

        roiImg = Image.open(byte_stream)  # Image打开Byte字节流数据
        # roiImg.show()   #  弹出 显示图片
        imgByteArr = BytesIO()  # 创建一个空的Bytes对象

        roiImg.save(imgByteArr, format='PNG')  # PNG就是图片格式,我试过换成JPG/jpg都不行

        imgByteArr = imgByteArr.getvalue()  # 这个就是保存的图片字节流
        with open('img.png', 'wb') as f:
            f.write(r.content)

        return imgByteArr

说明·问题

第三方(超级鹰)

完整代码

G码云


送给你们:

Valar morghulis (凡人皆有一死) Valar dohaeris (凡人皆需侍奉)

上一篇 下一篇

猜你喜欢

热点阅读