# Copyright (c) 2019, RangerUFO
# This file is part of alpr_utils.
# alpr_utils is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# alpr_utils is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with alpr_utils. If not, see <https://www.gnu.org/licenses/>.
import time
import math
import argparse
import mxnet as mx
import matplotlib.pyplot as plt
from gluoncv import model_zoo, data
from dataset import load_image, visualize
from utils import color_normalize, plate_labels, reconstruct_plates, Vocabulary
from wpod_net import WpodNet
from ocr_net import OcrNet
def fixed_crop(raw, bbox):
x0 = max(int(bbox[0].asscalar()), 0)
x0 = min(int(x0), raw.shape[1])
y0 = max(int(bbox[1].asscalar()), 0)
y0 = min(int(y0), raw.shape[0])
x1 = max(int(bbox[2].asscalar()), 0)
x1 = min(int(x1), raw.shape[1])
y1 = max(int(bbox[3].asscalar()), 0)
y1 = min(int(y1), raw.shape[0])
return mx.image.fixed_crop(raw, x0, y0, x1 - x0, y1 - y0)
def recognize_plate(vocab, ocr, plate, beam, beam_size, context):
ts = time.time()
x = color_normalize(plate).transpose((2, 0, 1)).expand_dims(0)
enc_y, self_attn = ocr.encode(x.as_in_context(context))
if beam:
sequences = [([vocab.char2idx("<GO>")], 0.0)]
while True:
candidates = []
for seq, score in sequences:
if seq[-1] == vocab.char2idx("<EOS>"):
candidates.append((seq, score))
tgt = mx.nd.array(seq, ctx=context).reshape((1, -1))
tgt_len = mx.nd.array([len(seq)], ctx=context)
y, context_attn = ocr.decode(tgt, tgt_len, enc_y)
probs = mx.nd.softmax(y, axis=2)
beam = probs[0, -1].topk(k=beam_size, ret_typ="both")
for i in range(beam_size):
candidates.append((seq + [int(beam[1][i].asscalar())], score + math.log(beam[0][i].asscalar())))
if len(candidates) <= len(sequences):
sequences = sorted(candidates, key=lambda tup: tup[1], reverse=True)[:beam_size]
scores = mx.nd.array([score for _, score in sequences], ctx=context)
probs = mx.nd.softmax(scores)
print("ocr profiling: %f" % (time.time() - ts))
for i, (seq, score) in enumerate(sequences):
print("".join([vocab.idx2char(token) for token in seq[1:-1]]), score, probs[i].asscalar())
sequence = [vocab.char2idx("<GO>")]
while True:
tgt = mx.nd.array(sequence, ctx=context).reshape((1, -1))
tgt_len = mx.nd.array([len(sequence)], ctx=context)
y, context_attn = ocr.decode(tgt, tgt_len, enc_y)
index = mx.nd.argmax(y, axis=2)
char_token = index[0, -1].asscalar()
sequence += [char_token]
if char_token == vocab.char2idx("<EOS>"):
print(vocab.idx2char(char_token), end="", flush=True)
def detect_plate(wpod, vocab, ocr, raw, dims, threshold, plt_hw, beam, beam_size, context):
h = raw.shape[0]
w = raw.shape[1]
f = min(288 * max(h, w) / min(h, w), 608) / min(h, w)
ts = time.time()
img = mx.image.imresize(
int(w * f) + (0 if w % 16 == 0 else 16 - w % 16),
int(h * f) + (0 if h % 16 == 0 else 16 - h % 16)
x = color_normalize(img).transpose((2, 0, 1)).expand_dims(0)
y = wpod(x.as_in_context(context))
probs = y[0, :, :, 0]
affines = y[0, :, :, 2:]
labels = plate_labels(img, probs, affines, dims, 16, threshold)
plates = reconstruct_plates(raw, [pts for pts, _ in labels], (plt_hw[1], plt_hw[0]))
print("wpod profiling: %f" % (time.time() - ts))
plt.subplot(math.ceil((len(plates) + 2) / 2), 2, 1)
visualize(img, [(pts.reshape((-1)).asnumpy().tolist(), str(prob)) for pts, prob in labels])
plt.subplot(math.ceil((len(plates) + 2) / 2), 2, 2)
visualize(probs > threshold)
for i, plate in enumerate(plates):
plt.subplot(math.ceil((len(plates) + 2) / 2), 2, i + 3)
print("plate[%d]:" % i)
recognize_plate(vocab, ocr, plate, beam, beam_size, context)
def test(images, dims, threshold, plt_hw, seq_len, no_yolo, beam, beam_size, context):
print("Loading model...")
if not no_yolo:
yolo = model_zoo.get_model('yolo3_darknet53_voc', pretrained=True, ctx=context)
wpod = WpodNet()
wpod.load_parameters("model/wpod_net.params", ctx=context)
vocab = Vocabulary()
ocr = OcrNet(plt_hw, vocab.size(), seq_len)
ocr.load_parameters("model/ocr_net.params", ctx=context)
for path in images:
raw = load_image(path)
if no_yolo:
detect_plate(wpod, vocab, ocr, raw, dims, threshold, plt_hw, beam, beam_size, context)
ts = time.time()
x, _ = data.transforms.presets.yolo.transform_test(raw, short=512)
classes, scores, bboxes = yolo(x.as_in_context(context))
bboxes[0, :, 0::2] = bboxes[0, :, 0::2] / x.shape[3] * raw.shape[1]
bboxes[0, :, 1::2] = bboxes[0, :, 1::2] / x.shape[2] * raw.shape[0]
vehicles = [
fixed_crop(raw, bboxes[0, i]) for i in range(classes.shape[1])
if (yolo.classes[int(classes[0, i].asscalar())] == 'car' or
yolo.classes[int(classes[0, i].asscalar())] == 'bus') and
scores[0, i].asscalar() > 0.5
print("yolo profiling: %f" % (time.time() - ts))
for i, raw in enumerate(vehicles):
print("vehicle[%d]:" % i)
detect_plate(wpod, vocab, ocr, raw, dims, threshold, plt_hw, beam, beam_size, context)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Start a ALPR tester.")
parser.add_argument("images", metavar="IMG", help="path of the image file[s]", type=str, nargs="+")
parser.add_argument("--dims", help="set the sample dimentions (default: 208)", type=int, default=208)
parser.add_argument("--threshold", help="set the positive threshold (default: 0.9)", type=float, default=0.9)
parser.add_argument("--plt_w", help="set the max width of output plate images (default: 144)", type=int, default=144)
parser.add_argument("--plt_h", help="set the max height of output plate images (default: 48)", type=int, default=48)
parser.add_argument("--seq_len", help="set the max length of output sequences (default: 8)", type=int, default=8)
parser.add_argument("--no_yolo", help="do not extract vehicles using YOLOv3", action="store_true")
parser.add_argument("--beam", help="using beam search", action="store_true")
parser.add_argument("--beam_size", help="set the size of beam (default: 5)", type=int, default=5)
parser.add_argument("--device_id", help="select device that the model using (default: 0)", type=int, default=0)
parser.add_argument("--gpu", help="using gpu acceleration", action="store_true")
args = parser.parse_args()
if args.gpu:
context = mx.gpu(args.device_id)
context = mx.cpu(args.device_id)
test(args.images, args.dims, args.threshold, (args.plt_h, args.plt_w), args.seq_len, args.no_yolo, args.beam, args.beam_size, context)
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。