1 Star 0 Fork 25

薛定谔的猫/懒人原神AI

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
lanrenonnx.py 9.66 KB
一键复制 编辑 原始数据 按行查看 历史
老大哥 提交于 2024-07-22 21:42 . 1.78开源
import os
import random
import time
import onnxruntime as ort
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont
def put_chinese_text_on_image(img, label, position, color=[0,255,0],font_path= 'Arial.Unicode.ttf', font_size=15, font_thickness=5):
"""
在OpenCV图像上绘制中文字符串。
参数:
img -- OpenCV图像。
label -- 要绘制的字符串。
position -- 字符串的起始位置 (x, y) 元组。
color -- 字体颜色,以BGR格式给出。
font_path -- 支持中文的字体文件的路径。
font_size -- 字体大小
font_thickness -- 字体 加粗
返回:
带有中文字符串的图像。
"""
# 转换图像以便使用PIL
pil_img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
draw = ImageDraw.Draw(pil_img)
# 加载自定义字体
pil_font = ImageFont.truetype(os.path.dirname(__file__) + "\\"+font_path, font_size) # 字体大小可能需要调整以匹配OpenCV的font_scale
# 获取文本的宽度和高度
text_width, text_height = draw.textbbox((0, 0), label, font=pil_font)[2:]
# 计算文本的起始位置,确保它不会超出图像边界
x, y = position
y_adjusted = max(y - text_height, 0)
position = (x, y_adjusted)
# 使用PIL绘制文本
draw.text(position, label, fill=tuple(color), font=pil_font,width=font_thickness)
# 将PIL图像转换回OpenCV图像
img_with_text = cv2.cvtColor(np.asarray(pil_img), cv2.COLOR_RGB2BGR)
return img_with_text
class LanRenOnnxYolov():
def __init__(self,weights='yolov5l_320.onnx',model_h=320, model_w=320, thred_nms=0.4, thred_cond=0.4,providers=["CUDAExecutionProvider",'CPUExecutionProvider'],dic_labels={}):
"""
yolov onnx推理
providers: [] ['TensorrtExecutionProvider', 'CUDAExecutionProvider', 'CPUExecutionProvider']
dic_labels: {0: 'person', 1: 'bicycle'}
"""
self.weights=weights
self.model_h=model_h
self.model_w=model_w
self.thred_nms=thred_nms
self.thred_cond=thred_cond
self.colors=[[random.randint(140, 255) for _ in range(3)] for color in range(len(dic_labels))]
self.openfile_name_model = weights # 模型名称
# self.so = ort.SessionOptions() # 树莓派上保留以下两段代码,注释下面那行代码
# self.net = ort.InferenceSession(self.openfile_name_model, self.so)
# ['TensorrtExecutionProvider', 'CUDAExecutionProvider', 'CPUExecutionProvider']
try:
self.net = ort.InferenceSession(self.openfile_name_model,
providers=providers) # 在树莓派上这里不需指定推理设备
p=self.net.get_providers()
print("执行提供者:",p,)
print("GPU加速:",str(p).find("CUDA")!=-1 or str(p).find("Tensorrt")!=-1 )
# 标签字典
self.dic_labels = dic_labels
big_img = np.zeros((1080, 1920, 3), dtype=np.uint8)
self.detect(big_img )
old_time = time.time()
self.detect(big_img )
print("检测帧率:", 1000 // int((time.time() - old_time) * 1000))
except:
print("CUDA加速失败,使用CPU推理")
providers=['CPUExecutionProvider']
self.net = ort.InferenceSession(self.openfile_name_model,
providers=providers) # 在树莓派上这里不需指定推理设备
p = self.net.get_providers()
print("执行提供者:", p, )
print("GPU加速:", str(p).find("CUDA") != -1 or str(p).find("Tensorrt") != -1)
# 标签字典
self.dic_labels = dic_labels
big_img = np.zeros((1080, 1920, 3), dtype=np.uint8)
self.detect(big_img)
old_time = time.time()
self.detect(big_img)
print("检测帧率:", 1000 // int((time.time() - old_time) * 1000))
# 标注目标
def plot_one_box(self,x, img, color=None, label=None, line_thickness=None):
"""
description: Plots one bounding box on image img,
this function comes from YoLov5 project.
param:
x: a box likes [x1,y1,x2,y2]
img: a opencv image object
color: color to draw rectangle, such as (0,255,0)
label: str
line_thickness: int
return:
img
"""
tl = (
line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1
) # line/font thickness
color = color or [random.randint(0, 255) for _ in range(3)]
c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
if label:
tf = max(tl - 1, 1) # font thickness
t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
#支持中文的写法
img = put_chinese_text_on_image(img, label, (c1[0], c1[1] - 2), [color[2],color[1],color[0]],font_size= t_size[1])
#不支持中文
# cv2.putText(
# img,
# label,
# (c1[0], c1[1] - 2),
# 0,
# tl / 3,
# [225, 255, 255],
# thickness=tf,
# lineType=cv2.LINE_AA,
# )
return img
# 极大值抑制
def post_process_opencv(self,outputs, img_h, img_w,):
conf = outputs[:, 4].tolist()
c_x = outputs[:, 0] / self.model_w * img_w
c_y = outputs[:, 1] / self.model_h * img_h
w = outputs[:, 2] / self.model_w * img_w
h = outputs[:, 3] / self.model_h * img_h
p_cls = outputs[:, 5:]
if len(p_cls.shape) == 1:
p_cls = np.expand_dims(p_cls, 1)
cls_id = np.argmax(p_cls, axis=1)
p_x1 = np.expand_dims(c_x - w / 2, -1)
p_y1 = np.expand_dims(c_y - h / 2, -1)
p_x2 = np.expand_dims(c_x + w / 2, -1)
p_y2 = np.expand_dims(c_y + h / 2, -1)
areas = np.concatenate((p_x1, p_y1, p_x2, p_y2), axis=-1)
areas = areas.tolist()
ids = cv2.dnn.NMSBoxes(areas, conf, self.thred_cond, self.thred_nms)
if len(ids) > 0:
return np.array(areas)[ids], np.array(conf)[ids], cls_id[ids]
else:
return [], [], []
# 推理
def detect(self,img_np,save_path=None,plot_box=True):
'''
预测
'''
img0 = img_np.copy()
# 图像预处理
img = cv2.resize(img0, [self.model_w, self.model_h], interpolation=cv2.INTER_AREA) # 缩放
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # 格式转换
img = img.astype(np.float32) / 255.0 # 归一化
blob = np.expand_dims(np.transpose(img, (2, 0, 1)), axis=0) # 维度转换
# 模型推理
outs = self.net.run(None, {self.net.get_inputs()[0].name: blob})[0].squeeze(axis=0)
# 输出坐标矫正
# outs = cal_outputs(outs, nl, na, model_w, model_h, anchor_grid, stride)
# 检测框计算
img_h, img_w, _ = np.shape(img0)
boxes, confs, ids = self.post_process_opencv(outs, img_h, img_w)
res_loc=[]
for box, score, id in zip(boxes, confs, ids):
# 标签
label = '%s:%.2f' % (self.dic_labels[id], score)
# 坐标转换
xyxy=box.astype(np.int16)
#画框框
if plot_box or save_path != None:
img0=self.plot_one_box(xyxy, img0, label=label, line_thickness=None,color= self.colors[id])
# 数据保存
res_loc.append({"label": self.dic_labels[id], "x": int(xyxy[0]) + (int(xyxy[2]) - int(xyxy[0])) // 2,
"y": int(xyxy[1]) + (int(xyxy[3]) - int(xyxy[1])) // 2, "sim": float(f"{score:.2f}"),
"points": ((xyxy[0],xyxy[1]), (xyxy[2],xyxy[3]))})
if save_path != None:
cv2.imwrite(save_path, img0)
if img0.shape[2]==4:
return res_loc,cv2.cvtColor(img0, cv2.COLOR_BGRA2BGR) # 格式转换
return res_loc,img0
if __name__ == '__main__':
image_path="D:\\pythonProjects\\yuanshenAI\\datasets\\yuanshen\\images\\17141983915923.jpg"
weights="./datas/yolov5l_320.onnx"
model_h=320
model_w=320
dic_labels={0:"玩家",1:"采集物",2:"生命树",3:"开关",4:"怪物",5:"提示关闭",6:"使用原粹树脂",7:"使用浓缩树脂",
8:"关闭",9:"继续挑战",10:"退出秘境",11:"副本门框",12:"副本打完了",13:"捡东西",14:"副本楼梯",
15:"往下落",16:"矿石",17:"往上跳",18:"交互对话",19:"采集物-风",20:"采集物-冰",21:"确定",
22:"取消",23:"返回",24:"被控了",25:"在水面",26:"宠物"}
big_img = cv2.imdecode(np.fromfile(file=image_path, dtype=np.uint8), cv2.IMREAD_COLOR) # 加载大图
yolov=LanRenOnnxYolov(weights=weights,model_w=model_w,model_h=model_h,dic_labels=dic_labels)
old_time=time.time()
res_loc,img=yolov.detect(big_img,save_path="test.jpg")
print((time.time()-old_time)*1000,res_loc)
old_time = time.time()
res_loc, img = yolov.detect(big_img, save_path=None, plot_box=True)
print((time.time() - old_time) * 1000, res_loc)
old_time = time.time()
res_loc, img = yolov.detect(big_img, save_path=None, plot_box=True)
print((time.time() - old_time) * 1000, res_loc)
old_time = time.time()
res_loc, img = yolov.detect(big_img, save_path=None, plot_box=True)
print((time.time() - old_time) * 1000, res_loc)
old_time = time.time()
res_loc, img = yolov.detect(big_img, save_path=None, plot_box=True)
print((time.time() - old_time) * 1000, res_loc)
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/DkHome/lanren-genshin-impact-ai.git
git@gitee.com:DkHome/lanren-genshin-impact-ai.git
DkHome
lanren-genshin-impact-ai
懒人原神AI
master

搜索帮助