代码拉取完成,页面将自动刷新
from PIL import Image
from PIL import ImageFilter
import urllib
import urllib.request
import requests
import re
import json
import time
import os,sys
import shutil
import cv2
import numpy as np
import uuid
import matplotlib.pyplot as plt
import pickle
# hack CERTIFICATE_VERIFY_FAILED
# https://github.com/mtschirs/quizduellapi/issues/2
import ssl
UA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.89 Safari/537.36"
pic_url = "https://kyfw.12306.cn/otn/passcodeNew/getPassCodeNew?module=login&rand=sjrand&0.21191171556711197"
ssl._create_default_https_context = ssl._create_unverified_context
data_dir = "E:\\data\\验证码\\12306验证码\\"
img_dir = data_dir + "\\imgs\\"
subimg_dir=data_dir+"\\subimgs\\"
textimg_dir=data_dir+"\\textimgs\\"
def get_img():
resp = urllib.request.urlopen(pic_url)
raw = resp.read()
with open("./tmp.jpg", 'wb') as fp:
fp.write(raw)
return Image.open("./tmp.jpg")
def get_sub_img(im, x, y):
assert 0 <= x <= 3
assert 0 <= y <= 2
WITH = HEIGHT = 68
left = 5 + (67 + 5) * x
top = 41 + (67 + 5) * y
right = left + 67
bottom = top + 67
return im.crop((left, top, right, bottom))
def ocr_question_extract(im):
# git@github.com:madmaze/pytesseract.git
global pytesseract
try:
import pytesseract
except:
print("[ERROR] pytesseract not installed")
return
im = im.crop((127, 3, 260, 22))
im = pre_ocr_processing(im)
# im.show()
return pytesseract.image_to_string(im, lang='chi_sim').strip()
def pre_ocr_processing(im):
im = im.convert("RGB")
width, height = im.size
white = im.filter(ImageFilter.BLUR).filter(ImageFilter.MaxFilter(23))
grey = im.convert('L')
impix = im.load()
whitepix = white.load()
greypix = grey.load()
for y in range(height):
for x in range(width):
greypix[x,y] = min(255, max(255 + impix[x,y][0] - whitepix[x,y][0],
255 + impix[x,y][1] - whitepix[x,y][1],
255 + impix[x,y][2] - whitepix[x,y][2]))
new_im = grey.copy()
binarize(new_im, 150)
return new_im
def binarize(im, thresh=120):
assert 0 < thresh < 255
assert im.mode == 'L'
w, h = im.size
for y in range(0, h):
for x in range(0, w):
if im.getpixel((x,y)) < thresh:
im.putpixel((x,y), 0)
else:
im.putpixel((x,y), 255)
def save():
for i in range(10000):
resp = urllib.request.urlopen(pic_url)
raw = resp.read()
with open("d:\\tmp\\" + str(i) + ".jpg", 'wb') as fp:
fp.write(raw)
def Re():
i = 0;
path = "d:\\tmp\\"
for strfile in os.listdir(path):
a = os.path.getsize(path + strfile) / 1024
if a < 10:
os.remove(path + strfile)
continue;
os.rename(path + strfile, path + str(i) + ".jpg")
i = i + 1
def move():
i = 0;
path = "d:\\imgs\\"
for strfile in os.listdir("d:\\imgs"):
name = str(i) + "_0.jpg";
os.rename(path + strfile, path + name)
shutil.copyfile(path + name, "d:\\tmp\\" + name)
i = i+1
path = "d:\\imgs1\\"
for strfile in os.listdir("d:\\imgs1"):
name = str(i) + "_1.jpg";
os.rename(path + strfile, path + name)
shutil.copyfile(path + name, "d:\\tmp\\" + name)
i = i+1
path = "d:\\imgs2\\"
for strfile in os.listdir("d:\\imgs2"):
name = str(i) + "_2.jpg";
os.rename(path + strfile, path + name)
shutil.copyfile(path + name, "d:\\tmp\\"+ name)
i = i+1
def get_text_image(img):
return img.crop((120, 0, 180, 29))
def getmodify():
for file in os.listdir("./img"):
img = cv2.imread("./img/" + file)
cv2.imwrite("./img/" + file,img)
def getalltextimage(srcdir,disdir):
for file in os.listdir(srcdir):
st = srcdir + file
try:
img = Image.open(st)
img=img.crop((120, 0, 180, 29))
img.save(disdir + file)
except Exception as e:
print("Exception:",e)
continue
def getallsubimage(srcdir,disdir):
for file in os.listdir(srcdir):
st = srcdir + file
try:
img = Image.open(st)
for x in range(0,2):
for y in range(0,4):
sub = get_sub_img(img,y,x)
sub.save(disdir + str(uuid.uuid1()) + ".jpg")
except Exception as e:
print("Exception:",e)
continue
def gettypecount():
files = os.listdir("./tags")
a = [x.split("_")[0] for x in files]
return len(list(set(a)))
# 给文件添加类型标号
def tags():
output = open("type.pk","rb")
d = pickle.load(output)
output.close()
files = os.listdir("./tags")
for file in files:
s = file.split("_")
s = s[0] + "_" + str(d[s[0]]) + "_" + s[1]
os.rename("./tags/" + file, "./tags/" + s)
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。