import cv2
import numpy as np
import torch
import math
# Load the YOLOv5 model
model_path = 'yolov5s.pt'
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = torch.hub.load('ultralytics/yolov5', 'custom', path=model_path)
# Load the video
video = cv2.VideoCapture('videos/test4.mp4')
output_filename = 'output_video2.mp4'
width, height = 1280, 720
videoOut = cv2.VideoWriter(output_filename, cv2.VideoWriter_fourcc(*'mp4v'), 20, (width, height))
# track = True
track = False
yolo_classes = [
'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
'hair drier', 'toothbrush'
def overlay_transparent(background, foreground, angle, x, y, objSize=50):
original_frame = background.copy()
foreground = cv2.resize(foreground, (objSize, objSize))
# Get the shape of the foreground image
rows, cols, channels = foreground.shape
# Calculate the center of the foreground image
center_x = int(cols / 2)
center_y = int(rows / 2)
# Rotate the foreground image
M = cv2.getRotationMatrix2D((center_x, center_y), angle, 1)
foreground = cv2.warpAffine(foreground, M, (cols, rows))
# Overlay the rotated foreground image onto the background image
for row in range(rows):
for col in range(cols):
if x + row < background.shape[0] and y + col < background.shape[1]:
alpha = foreground[row, col, 3] / 255.0
background[x + row, y + col] = alpha * foreground[row, col, :3] + (1 - alpha) * background[x + row, y + col]
# Blend the foreground and background ROI using cv2.addWeighted
result = background
return result
def simulate_object(background, object_class, x, y):
# Load the object image based on the class
object_img = cv2.imread(f'assets/{object_class}.png', cv2.IMREAD_UNCHANGED)
if object_img is None:
return background
# Simulate the object by overlaying it onto the background image
# object_img = cv2.resize(object_img, (100, 100))
background[y:y+100, x:x+100] = overlay_transparent(background[y:y+100, x:x+100], object_img, 0, 0, 0)
return background
def add_myCar_overlay(background):
overlay_img = cv2.imread('assets/MyCar.png', cv2.IMREAD_UNCHANGED)
# Get the shape of the overlay image
rows, cols, _ = overlay_img.shape
x = 550
y = background.shape[0] - 200
# Overlay the image onto the background
overlay_img = overlay_transparent(background[y:y+rows, x:x+cols], overlay_img, 0, 0, 0, objSize=250)
background[y:y+rows, x:x+cols] = overlay_img
return background
def plot_object_bev(transformed_image_with_centroids, src_points ,dst_points , objs_):
M = cv2.getPerspectiveTransform(src_points, dst_points)
persObjs = []
## mark objs and ids
for obj_ in objs_:
if obj_:
# Create a numpy array of the centroid coordinates
centroid_coords = np.array([list(obj_[0])], dtype=np.float32)
# Apply the perspective transformation to the centroid coordinates
transformed_coords = cv2.perspectiveTransform(centroid_coords.reshape(-1, 1, 2), M)
transformed_coords_ = tuple(transformed_coords[0][0].astype(int))
# Draw a circle at the transformed centroid location
cv2.circle(transformed_image_with_centroids, transformed_coords_, radius=3, color=(0, 255, 0), thickness=-1)
cv2.circle(transformed_image_with_centroids, transformed_coords_, radius=12, color=(255, 255, 255), thickness=1)
class_text = f"Class: {obj_[1]}"
cv2.putText(transformed_image_with_centroids, class_text, (transformed_coords_[0] + 10, transformed_coords_[1]), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
persObjs.append([transformed_coords_, obj_[1]])
return transformed_image_with_centroids, persObjs
frame_count = 0
centroid_prev_frame = []
tracking_objects = {}
tracking_id = 0
# Process each frame of the video
while True:
# Read the next frame
success, frame = video.read()
frame = cv2.resize(frame, (width, height))
frame_count += 1
if not success:
# Perform object detection on the frame
results = model(frame, size=320)
detections = results.pred[0]
# Create a black image with the same size as the video frames
image_ = np.zeros((height, width, 3), dtype=np.uint8)
simulated_image = image_.copy()
transformed_image_with_centroids = image_.copy()
transformed_image_to_sim = image_.copy()
simObjs = image_.copy()
objs = []
centroid_curr_frame = []
for detection in detections:
xmin = detection[0]
ymin = detection[1]
xmax = detection[2]
ymax = detection[3]
score = detection[4]
class_id= detection[5]
centroid_x = int(xmin + xmax) // 2
centroid_y = int(ymin + ymax) // 2
if int(class_id) in [0, 1, 2, 3, 5, 7] and score >= 0.3:
# Draw bounding box on the frame
color = (0, 0, 255)
object_label = f"{class_id}: {score:.2f}"
cv2.rectangle(frame, (int(xmin), int(ymin)), (int(xmax), int(ymax)), color, 2)
cv2.putText(frame, object_label, (int(xmin), int(ymin) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 1)
centroid_curr_frame.append([(centroid_x, centroid_y), yolo_classes[int(class_id)]])
if track:
objs.append([(centroid_x, centroid_y), yolo_classes[int(class_id)]])
if track:
if frame_count <= 2:
for pt1, class_id in centroid_curr_frame:
for pt2, class_id in centroid_prev_frame:
dist = math.hypot(pt2[0] - pt1[0], pt2[1] - pt1[1])
if dist < 50:
tracking_objects[tracking_id] = pt1, class_id
tracking_id += 1
tracking_objects_copy = tracking_objects.copy()
for obj_id, pt2 in tracking_objects_copy.items():
objects_exists = False
for pt1, class_id in centroid_curr_frame:
dist = math.hypot(pt2[0][0] - pt1[0], pt2[0][1] - pt1[1])
if dist < 20:
tracking_objects[obj_id] = pt1, class_id
objects_exists = True
if not objects_exists:
for obj_id, pt1 in tracking_objects.items():
cv2.circle(frame, pt1[0], 3, (0, 255, 255), -1)
# cv2.putText(frame, str(obj_id)+' '+str(pt1[1]), pt1[0], cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 1)
if track:
objs.append([pt1[0], pt1[1]])
centroid_prev_frame = centroid_curr_frame.copy()
## BEV ##
# Define the source points (region of interest) in the original image
x1, y1 = 10, 720 # Top-left point
x2, y2 = 530, 400 # Top-right point
x3, y3 = 840, 400 # Bottom-right point
x4, y4 = 1270, 720 # Bottom-left point
src_points = np.float32([(x1, y1), (x2, y2), (x3, y3), (x4, y4)])
# Draw the source points on the image (in red)
# cv2.polylines(frame, [src_points.astype(int)], isClosed=True, color=(0, 0, 255), thickness=2)
# # Define the destination points (desired output perspective)
u1, v1 = 370, 720 # Top-left point
u2, v2 = 0+150, 0 # Top-right point
u3, v3 = 1280-150, 0 # Bottom-right point
u4, v4 = 900, 720 # Bottom-left point
dst_points = np.float32([[u1, v1], [u2, v2], [u3, v3], [u4, v4]])
# # Draw the destination points on the image (in blue)
# cv2.polylines(frame, [dst_points.astype(int)], isClosed=True, color=(255, 0, 0), thickness=2)
# perspectivs plot and objs
transformed_image_with_centroids, persObjs_ = plot_object_bev(transformed_image_with_centroids, src_points ,dst_points , objs)
### plot objs overlays
for persObj_ in persObjs_:
simObjs = simulate_object(transformed_image_to_sim, persObj_[1], persObj_[0][0], persObj_[0][1])
# Add the car_img overlay to the simulated image
simulated_image = add_myCar_overlay(simObjs)
# Display the simulated image and frame
cv2.imshow("Video", frame)
cv2.imshow("Simulated Objects", simulated_image)
cv2.imshow('Transformed Frame', transformed_image_with_centroids)
# cv2.imwrite('test.jpg', simulated_image)
if cv2.waitKey(1) & 0xFF == ord('q'):
# Release the video capture object
