1 Star 0 Fork 0

woddle/IdentifyingCode

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
test.py 4.97 KB
一键复制 编辑 原始数据 按行查看 历史
woddle 提交于 2017-08-10 22:35 . 生产了部分子图
from PIL import Image
from PIL import ImageFilter
import urllib
import urllib.request
import requests
import re
import json
import time
import os,sys
import shutil
import cv2
import numpy as np
import uuid
import matplotlib.pyplot as plt
import pickle
# hack CERTIFICATE_VERIFY_FAILED
# https://github.com/mtschirs/quizduellapi/issues/2
import ssl
UA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.89 Safari/537.36"
pic_url = "https://kyfw.12306.cn/otn/passcodeNew/getPassCodeNew?module=login&rand=sjrand&0.21191171556711197"
ssl._create_default_https_context = ssl._create_unverified_context
data_dir = "E:\\data\\验证码\\12306验证码\\"
img_dir = data_dir + "\\imgs\\"
subimg_dir=data_dir+"\\subimgs\\"
textimg_dir=data_dir+"\\textimgs\\"
def get_img():
resp = urllib.request.urlopen(pic_url)
raw = resp.read()
with open("./tmp.jpg", 'wb') as fp:
fp.write(raw)
return Image.open("./tmp.jpg")
def get_sub_img(im, x, y):
assert 0 <= x <= 3
assert 0 <= y <= 2
WITH = HEIGHT = 68
left = 5 + (67 + 5) * x
top = 41 + (67 + 5) * y
right = left + 67
bottom = top + 67
return im.crop((left, top, right, bottom))
def ocr_question_extract(im):
# git@github.com:madmaze/pytesseract.git
global pytesseract
try:
import pytesseract
except:
print("[ERROR] pytesseract not installed")
return
im = im.crop((127, 3, 260, 22))
im = pre_ocr_processing(im)
# im.show()
return pytesseract.image_to_string(im, lang='chi_sim').strip()
def pre_ocr_processing(im):
im = im.convert("RGB")
width, height = im.size
white = im.filter(ImageFilter.BLUR).filter(ImageFilter.MaxFilter(23))
grey = im.convert('L')
impix = im.load()
whitepix = white.load()
greypix = grey.load()
for y in range(height):
for x in range(width):
greypix[x,y] = min(255, max(255 + impix[x,y][0] - whitepix[x,y][0],
255 + impix[x,y][1] - whitepix[x,y][1],
255 + impix[x,y][2] - whitepix[x,y][2]))
new_im = grey.copy()
binarize(new_im, 150)
return new_im
def binarize(im, thresh=120):
assert 0 < thresh < 255
assert im.mode == 'L'
w, h = im.size
for y in range(0, h):
for x in range(0, w):
if im.getpixel((x,y)) < thresh:
im.putpixel((x,y), 0)
else:
im.putpixel((x,y), 255)
def save():
for i in range(10000):
resp = urllib.request.urlopen(pic_url)
raw = resp.read()
with open("d:\\tmp\\" + str(i) + ".jpg", 'wb') as fp:
fp.write(raw)
def Re():
i = 0;
path = "d:\\tmp\\"
for strfile in os.listdir(path):
a = os.path.getsize(path + strfile) / 1024
if a < 10:
os.remove(path + strfile)
continue;
os.rename(path + strfile, path + str(i) + ".jpg")
i = i + 1
def move():
i = 0;
path = "d:\\imgs\\"
for strfile in os.listdir("d:\\imgs"):
name = str(i) + "_0.jpg";
os.rename(path + strfile, path + name)
shutil.copyfile(path + name, "d:\\tmp\\" + name)
i = i+1
path = "d:\\imgs1\\"
for strfile in os.listdir("d:\\imgs1"):
name = str(i) + "_1.jpg";
os.rename(path + strfile, path + name)
shutil.copyfile(path + name, "d:\\tmp\\" + name)
i = i+1
path = "d:\\imgs2\\"
for strfile in os.listdir("d:\\imgs2"):
name = str(i) + "_2.jpg";
os.rename(path + strfile, path + name)
shutil.copyfile(path + name, "d:\\tmp\\"+ name)
i = i+1
def get_text_image(img):
return img.crop((120, 0, 180, 29))
def getmodify():
for file in os.listdir("./img"):
img = cv2.imread("./img/" + file)
cv2.imwrite("./img/" + file,img)
def getalltextimage(srcdir,disdir):
for file in os.listdir(srcdir):
st = srcdir + file
try:
img = Image.open(st)
img=img.crop((120, 0, 180, 29))
img.save(disdir + file)
except Exception as e:
print("Exception:",e)
continue
def getallsubimage(srcdir,disdir):
for file in os.listdir(srcdir):
st = srcdir + file
try:
img = Image.open(st)
for x in range(0,2):
for y in range(0,4):
sub = get_sub_img(img,y,x)
sub.save(disdir + str(uuid.uuid1()) + ".jpg")
except Exception as e:
print("Exception:",e)
continue
def gettypecount():
files = os.listdir("./tags")
a = [x.split("_")[0] for x in files]
return len(list(set(a)))
# 给文件添加类型标号
def tags():
output = open("type.pk","rb")
d = pickle.load(output)
output.close()
files = os.listdir("./tags")
for file in files:
s = file.split("_")
s = s[0] + "_" + str(d[s[0]]) + "_" + s[1]
os.rename("./tags/" + file, "./tags/" + s)
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/woddle/IdentifyingCode.git
git@gitee.com:woddle/IdentifyingCode.git
woddle
IdentifyingCode
IdentifyingCode
master

搜索帮助