Fetch the repository succeeded.
This action will force synchronization from 梁新斌/Scrpay, which will overwrite any changes that you have made since you forked the repository, and can not be recovered!!!
Synchronous operation will process in the background and will refresh the page when finishing processing. Please be patient.
import tesserocr
from PIL import Image
import os
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import time
from io import BytesIO
from selenium.webdriver import ActionChains
'''
网站验证码识别
'''
#第一类验证码识别:常规验证码之别,识别中国知网用户注册页面验证码;此类验证码一般为四个数字或者字符组成,识别较容易
def v_code_1(num):
'''
author : liangxinbin
date:2019-01-16
功能实现:常规验证码之别,识别中国知网用户注册页面验证码;此类验证码一般为四个数字或者字符组成,识别较容易
网页地址:http://www.cnki.net/
return: 图片转换之后的字符串
'''
if num == 1:
#此种方式可以处理较清晰的验证码,如果验证码不太清晰,可能会识别错误,此时需要对验证码做一些处理,调用else中的分支处理
image = Image.open('image' + os.path.sep + 'code_2.jpg')
result = tesserocr.image_to_text(image)
print(result)
else:
#如果验证码不太清晰,可能会影响识别,此时需要对验证码进行处理,如转化为灰度图片,二值化等
image = Image.open('image' + os.path.sep + 'code_2.jpg')
#在convert方法中传入参数 L 可将图片做灰度处理,传入 1 可做二值化处理,也可以指定阈值做二值化处理
#? 什么是二值化处理,和指定阈值的二值化处理
image = image.convert('L')
threshold = 127
table = []
for i in range(256):
if i < threshold:
table.append(0)
else:
table.append(1)
image = image.point(table,'1')
#显示图片
image.show()
result = tesserocr.image_to_text(image)
print(result)
#第二类验证码识别
#识别拖拽拼图类型的验证码网站
#实例网站:https://auth.geetest.com/login/
Email = '578038469@qq.com'
Pwd = '****'
BORDER = 6
INIT_LEFT = 60
class CrackGeetest():
#登录数据初始化
def __init__(self):
self.url = 'https://auth.geetest.com/login/'
self.browers = webdriver.Chrome()
self.wait = WebDriverWait(self.browers,30)
self.email = Email
self.passwd = Pwd
def open(self):
'''
打开对象的网页,输入用户名称和密码
:return:
'''
self.browers.get(self.url)
#将输入邮箱的输入框赋值给input_email
input_email = self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'#base > div.content-outter > div > div > div:nth-child(3) > div > form > div:nth-child(1) > div > div > div > input')))
input_pwd = self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'#base > div.content-outter > div > div > div:nth-child(3) > div > form > div:nth-child(2) > div > div:nth-child(1) > div > input')))
input_email.clear()
input_pwd.clear()
input_email.send_keys(self.email)
input_pwd.send_keys(self.passwd)
#第一步:用selenium驱动谷歌浏览器模拟人的行为点击验证码
def get_geetest_button(self):
'''
获取初始化验证按钮的位置,在按钮可见之后返回按钮
:return:按钮对象
'''
button = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME,'geetest_radar_tip')))
return button
#第二步:识别缺口:获取前后两张图片,不一致的地方即可缺口
def get_position(self):
"""
获取验证码位置
:return: 验证码位置元组
"""
img = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'geetest_canvas_img')))
time.sleep(2)
location = img.location
size = img.size
top, bottom, left, right = location['y'], location['y'] + size['height'], location['x'], location['x'] + size['width']
return (top, bottom, left, right)
def get_screenshot(self):
"""
获取网页截图
:return: 截图对象
"""
screenshot = self.browers.get_screenshot_as_png()
screenshot = Image.open(BytesIO(screenshot))
return screenshot
def get_slider(self):
"""
获取滑块
:return: 滑块对象
"""
slider = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'geetest_slider_button')))
return slider
def get_geetest_image(self, name='captcha.png'):
"""
获取验证码图片
:return: 图片对象
crop() 按照位置截取图片
"""
top, bottom, left, right = self.get_position()
print('验证码位置', top, bottom, left, right)
screenshot = self.get_screenshot()
captcha = screenshot.crop((left, top, right, bottom))
captcha.save(name)
return captcha
def get_gap(self, image1, image2):
"""
获取缺口偏移量
:param image1: 不带缺口图片
:param image2: 带缺口图片
:return:
"""
left = 60
for i in range(left, image1.size[0]):
for j in range(image1.size[1]):
if not self.is_pixel_equal(image1, image2, i, j):
left = i
return left
return left
def is_pixel_equal(self, image1, image2, x, y):
"""
判断两个像素是否相同
:param image1: 图片1
:param image2: 图片2
:param x: 位置x
:param y: 位置y
:return: 像素是否相同
"""
# 取两个图片的像素点
pixel1 = image1.load()[x, y]
pixel2 = image2.load()[x, y]
threshold = 60
if abs(pixel1[0] - pixel2[0]) < threshold and abs(pixel1[1] - pixel2[1]) < threshold and abs(pixel1[2] - pixel2[2]) < threshold:
return True
else:
return False
def get_track(self, distance):
"""
根据偏移量获取移动轨迹
:param distance: 偏移量
:return: 移动轨迹
"""
# 移动轨迹
track = []
# 当前位移
current = 0
# 减速阈值
mid = distance * 4 / 5
# 计算间隔
t = 0.2
# 初速度
v = 0
while current < distance:
if current < mid:
# 加速度为正2
a = 2
else:
# 加速度为负3
a = -3
# 初速度v0
v0 = v
# 当前速度v = v0 + at
v = v0 + a * t
# 移动距离x = v0t + 1/2 * a * t^2
move = v0 * t + 1 / 2 * a * t * t
# 当前位移
current += move
# 加入轨迹
track.append(round(move))
return track
def move_to_gap(self, slider, track):
"""
拖动滑块到缺口处
:param slider: 滑块
:param track: 轨迹
:return:
"""
ActionChains(self.browers).click_and_hold(slider).perform()
for x in track:
ActionChains(self.browers).move_by_offset(xoffset=x, yoffset=0).perform()
time.sleep(0.5)
ActionChains(self.browers).release().perform()
def login(self):
"""
登录
:return: None
"""
submit = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'login-btn')))
submit.click()
time.sleep(10)
print('登录成功')
def crack(self):
# 输入用户名密码
self.open()
# 点击验证按钮
button = self.get_geetest_button()
button.click()
# 获取验证码图片
image1 = self.get_geetest_image('captcha1.png')
# 点按呼出缺口
slider = self.get_slider()
slider.click()
# 获取带缺口的验证码图片
image2 = self.get_geetest_image('captcha2.png')
# 获取缺口位置
gap = self.get_gap(image1, image2)
print('缺口位置', gap)
# 减去缺口位移
gap -= BORDER
# 获取移动轨迹
track = self.get_track(gap)
print('滑动轨迹', track)
# 拖动滑块
self.move_to_gap(slider, track)
success = self.wait.until(
EC.text_to_be_present_in_element((By.CLASS_NAME, 'geetest_success_radar_tip_content'), '验证成功'))
print(success)
# 失败后重试
if not success:
self.crack()
else:
self.login()
#第三类验证码识别:类似12306的验证码
#此处借助第三方打码平台超级鹰完成,参考代码
#https://github.com/Python3WebSpider/CrackTouClick
if __name__ == '__main__':
# v_code_1(0)
crack = CrackGeetest()
crack.crack()
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。