master

分支 (1)

标签 (41)

管理

管理

master

V1.83

V1.82

V1.81

V1.79

V1.78

V1.77

V1.76

V1.75

V1.73

V1.72

V1.7

V1.699

V1.698

V1.697

V1.696

V1.694

V1.693

V1.692

V1.691

V1.683

lanren-genshin-impact-ai
/
lanrenonnx.py

import os
import random
import time
import onnxruntime as ort
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont
def put_chinese_text_on_image(img, label, position, color=[0,255,0],font_path= 'Arial.Unicode.ttf', font_size=15, font_thickness=5):
    """
    在OpenCV图像上绘制中文字符串。

    参数:
    img -- OpenCV图像。
    label -- 要绘制的字符串。
    position -- 字符串的起始位置 (x, y) 元组。
    color -- 字体颜色，以BGR格式给出。
    font_path -- 支持中文的字体文件的路径。
    font_size -- 字体大小
    font_thickness -- 字体 加粗


    返回:
    带有中文字符串的图像。
    """

    # 转换图像以便使用PIL
    pil_img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    draw = ImageDraw.Draw(pil_img)

    # 加载自定义字体
    pil_font = ImageFont.truetype(os.path.dirname(__file__) + "\\"+font_path, font_size)  # 字体大小可能需要调整以匹配OpenCV的font_scale

    # 获取文本的宽度和高度
    text_width, text_height = draw.textbbox((0, 0), label, font=pil_font)[2:]


    # 计算文本的起始位置，确保它不会超出图像边界
    x, y = position
    y_adjusted = max(y - text_height, 0)
    position = (x, y_adjusted)

    # 使用PIL绘制文本
    draw.text(position, label, fill=tuple(color), font=pil_font,width=font_thickness)

    # 将PIL图像转换回OpenCV图像
    img_with_text = cv2.cvtColor(np.asarray(pil_img), cv2.COLOR_RGB2BGR)

    return img_with_text
class LanRenOnnxYolov():

    def __init__(self,weights='yolov5l_320.onnx',model_h=320, model_w=320, thred_nms=0.4, thred_cond=0.4,providers=["CUDAExecutionProvider",'CPUExecutionProvider'],dic_labels={}):
        """
        yolov onnx推理
        providers: []   ['TensorrtExecutionProvider', 'CUDAExecutionProvider', 'CPUExecutionProvider']
        dic_labels: {0: 'person', 1: 'bicycle'}
        """
        self.weights=weights
        self.model_h=model_h
        self.model_w=model_w
        self.thred_nms=thred_nms
        self.thred_cond=thred_cond
        self.colors=[[random.randint(140, 255) for _ in range(3)] for color in range(len(dic_labels))]
        self.openfile_name_model = weights  # 模型名称

        # self.so = ort.SessionOptions()  # 树莓派上保留以下两段代码,注释下面那行代码
        # self.net = ort.InferenceSession(self.openfile_name_model, self.so)
        # ['TensorrtExecutionProvider', 'CUDAExecutionProvider', 'CPUExecutionProvider']
        try:
            self.net = ort.InferenceSession(self.openfile_name_model,
                                            providers=providers)  # 在树莓派上这里不需指定推理设备
            p=self.net.get_providers()
            print("执行提供者:",p,)
            print("GPU加速:",str(p).find("CUDA")!=-1 or str(p).find("Tensorrt")!=-1 )
            # 标签字典
            self.dic_labels = dic_labels
            big_img = np.zeros((1080, 1920, 3), dtype=np.uint8)
            self.detect(big_img )
            old_time = time.time()
            self.detect(big_img )
            print("检测帧率:", 1000 // int((time.time() - old_time) * 1000))
        except:
            print("CUDA加速失败,使用CPU推理")
            providers=['CPUExecutionProvider']
            self.net = ort.InferenceSession(self.openfile_name_model,
                                            providers=providers)  # 在树莓派上这里不需指定推理设备
            p = self.net.get_providers()
            print("执行提供者:", p, )
            print("GPU加速:", str(p).find("CUDA") != -1 or str(p).find("Tensorrt") != -1)
            # 标签字典
            self.dic_labels = dic_labels
            big_img = np.zeros((1080, 1920, 3), dtype=np.uint8)
            self.detect(big_img)
            old_time = time.time()
            self.detect(big_img)
            print("检测帧率:", 1000 // int((time.time() - old_time) * 1000))

    # 标注目标
    def plot_one_box(self,x, img, color=None, label=None, line_thickness=None):

        """
        description: Plots one bounding box on image img,
                     this function comes from YoLov5 project.
        param:
            x:      a box likes [x1,y1,x2,y2]
            img:    a opencv image object
            color:  color to draw rectangle, such as (0,255,0)
            label:  str
            line_thickness: int
        return:
            img
        """
        tl = (
                line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1
        )  # line/font thickness
        color = color or [random.randint(0, 255) for _ in range(3)]
        c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
        cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
        if label:

            tf = max(tl - 1, 1)  # font thickness
            t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
            #支持中文的写法
            img = put_chinese_text_on_image(img, label, (c1[0], c1[1] - 2), [color[2],color[1],color[0]],font_size= t_size[1])
            #不支持中文
            # cv2.putText(
            #     img,
            #     label,
            #     (c1[0], c1[1] - 2),
            #     0,
            #     tl / 3,
            #     [225, 255, 255],
            #     thickness=tf,
            #     lineType=cv2.LINE_AA,
            # )
        return img

    # 极大值抑制
    def post_process_opencv(self,outputs, img_h, img_w,):
        conf = outputs[:, 4].tolist()
        c_x = outputs[:, 0] / self.model_w * img_w
        c_y = outputs[:, 1] / self.model_h * img_h
        w = outputs[:, 2] / self.model_w * img_w
        h = outputs[:, 3] / self.model_h * img_h
        p_cls = outputs[:, 5:]
        if len(p_cls.shape) == 1:
            p_cls = np.expand_dims(p_cls, 1)
        cls_id = np.argmax(p_cls, axis=1)

        p_x1 = np.expand_dims(c_x - w / 2, -1)
        p_y1 = np.expand_dims(c_y - h / 2, -1)
        p_x2 = np.expand_dims(c_x + w / 2, -1)
        p_y2 = np.expand_dims(c_y + h / 2, -1)
        areas = np.concatenate((p_x1, p_y1, p_x2, p_y2), axis=-1)

        areas = areas.tolist()
        ids = cv2.dnn.NMSBoxes(areas, conf, self.thred_cond, self.thred_nms)
        if len(ids) > 0:
            return np.array(areas)[ids], np.array(conf)[ids], cls_id[ids]
        else:
            return [], [], []

    # 推理
    def detect(self,img_np,save_path=None,plot_box=True):
        '''
        预测
        '''
        img0 = img_np.copy()
        # 图像预处理
        img = cv2.resize(img0, [self.model_w, self.model_h], interpolation=cv2.INTER_AREA)  # 缩放
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # 格式转换
        img = img.astype(np.float32) / 255.0  # 归一化
        blob = np.expand_dims(np.transpose(img, (2, 0, 1)), axis=0)  # 维度转换
        # 模型推理
        outs = self.net.run(None, {self.net.get_inputs()[0].name: blob})[0].squeeze(axis=0)
        # 输出坐标矫正
        # outs = cal_outputs(outs, nl, na, model_w, model_h, anchor_grid, stride)
        # 检测框计算
        img_h, img_w, _ = np.shape(img0)
        boxes, confs, ids = self.post_process_opencv(outs,  img_h, img_w)
        res_loc=[]
        for box, score, id in zip(boxes, confs, ids):
            # 标签
            label = '%s:%.2f' % (self.dic_labels[id], score)
            # 坐标转换
            xyxy=box.astype(np.int16)
            #画框框
            if plot_box or save_path != None:

                img0=self.plot_one_box(xyxy, img0, label=label, line_thickness=None,color= self.colors[id])
            # 数据保存
            res_loc.append({"label": self.dic_labels[id], "x": int(xyxy[0]) + (int(xyxy[2]) - int(xyxy[0])) // 2,
                            "y": int(xyxy[1]) + (int(xyxy[3]) - int(xyxy[1])) // 2, "sim": float(f"{score:.2f}"),
                            "points": ((xyxy[0],xyxy[1]), (xyxy[2],xyxy[3]))})
        if save_path != None:
            cv2.imwrite(save_path, img0)
        if img0.shape[2]==4:

            return res_loc,cv2.cvtColor(img0, cv2.COLOR_BGRA2BGR)  # 格式转换
        return res_loc,img0
if __name__ == '__main__':
    image_path="D:\\pythonProjects\\yuanshenAI\\datasets\\yuanshen\\images\\17141983915923.jpg"
    weights="./datas/yolov5l_320.onnx"
    model_h=320
    model_w=320
    dic_labels={0:"玩家",1:"采集物",2:"生命树",3:"开关",4:"怪物",5:"提示关闭",6:"使用原粹树脂",7:"使用浓缩树脂",
                8:"关闭",9:"继续挑战",10:"退出秘境",11:"副本门框",12:"副本打完了",13:"捡东西",14:"副本楼梯",
                15:"往下落",16:"矿石",17:"往上跳",18:"交互对话",19:"采集物-风",20:"采集物-冰",21:"确定",
                22:"取消",23:"返回",24:"被控了",25:"在水面",26:"宠物"}
    big_img = cv2.imdecode(np.fromfile(file=image_path, dtype=np.uint8), cv2.IMREAD_COLOR)  # 加载大图

    yolov=LanRenOnnxYolov(weights=weights,model_w=model_w,model_h=model_h,dic_labels=dic_labels)
    old_time=time.time()
    res_loc,img=yolov.detect(big_img,save_path="test.jpg")
    print((time.time()-old_time)*1000,res_loc)

    old_time = time.time()
    res_loc, img = yolov.detect(big_img, save_path=None, plot_box=True)
    print((time.time() - old_time) * 1000, res_loc)
    old_time = time.time()
    res_loc, img = yolov.detect(big_img, save_path=None, plot_box=True)
    print((time.time() - old_time) * 1000, res_loc)
    old_time = time.time()
    res_loc, img = yolov.detect(big_img, save_path=None, plot_box=True)
    print((time.time() - old_time) * 1000, res_loc)
    old_time = time.time()
    res_loc, img = yolov.detect(big_img, save_path=None, plot_box=True)
    print((time.time() - old_time) * 1000, res_loc)