-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvideoSynopsis.py
142 lines (115 loc) · 5.58 KB
/
videoSynopsis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import cv2
import csv
import os
import numpy as np
from tqdm import tqdm
def generate_video_synopsis(bg_image_path, csv_file, crops_dir, output_dir, video_output=None, fps=30, max_objects_per_frame=5):
"""
Generate a video synopsis by blending objects from different times into a single frame.
Args:
bg_image_path (str): Path to the background image for the video synopsis.
csv_file (str): Path to the CSV file containing object tracking data.
crops_dir (str): Directory containing cropped object images.
output_dir (str): Directory to save the output frames.
video_output (str): Path to save the output video file (optional).
fps (int): Frames per second for the output video.
max_objects_per_frame (int): Maximum number of objects to display in each frame.
Returns:
None
"""
# Load the background image
bg_image = cv2.imread(bg_image_path)
if bg_image is None:
raise ValueError(f"Background image {bg_image_path} not found.")
# Get dimensions of the background
frame_height, frame_width, _ = bg_image.shape
# Initialize video writer if needed
if video_output:
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
video_writer = cv2.VideoWriter(video_output, fourcc, fps, (frame_width, frame_height))
# Create the output directory if it doesn't exist
if not os.path.exists(output_dir):
os.makedirs(output_dir)
# Load tracking data from CSV file
frame_dict = {}
with open(csv_file, mode='r') as file:
csv_reader = csv.DictReader(file)
for row in csv_reader:
frame_num = int(row['Frame'])
track_id = row['Track ID']
class_name = row['Class Name']
bbox = eval(row['Bounding Box']) # Format: (x1, y1, x2, y2)
crop_path = os.path.join(crops_dir, row['Crop Path'])
# Organize data per frame
if frame_num not in frame_dict:
frame_dict[frame_num] = []
frame_dict[frame_num].append((track_id, class_name, bbox, crop_path))
# Object data structure: track ID -> list of frame info (bbox, crop path)
track_data = {}
for frame_num in sorted(frame_dict.keys()):
for track_id, class_name, bbox, crop_path in frame_dict[frame_num]:
if track_id not in track_data:
track_data[track_id] = []
track_data[track_id].append((frame_num, bbox, crop_path))
# Step 2: Video synopsis by blending objects from different times
synopsis_frames = []
current_frame_num = 0
used_tracks = set()
while len(used_tracks) < len(track_data):
# Create a new frame by cloning the background
frame_image = bg_image.copy()
# Select objects to display in this synopsis frame
objects_in_frame = 0
for track_id, track_frames in track_data.items():
if track_id in used_tracks:
continue
# Find the next available frame for this track
for frame_info in track_frames:
frame_num, bbox, crop_path = frame_info
if objects_in_frame >= max_objects_per_frame:
break # Max number of objects reached for this frame
# Blend object into the frame
x1, y1, x2, y2 = bbox
if x2 <= x1 or y2 <= y1:
continue # Skip invalid bounding boxes
# Load the cropped object image
crop_img = cv2.imread(crop_path)
if crop_img is None:
continue
crop_height, crop_width, _ = crop_img.shape
centroid_x = (x1 + x2) // 2
centroid_y = (y1 + y2) // 2
top_left_x = int(centroid_x - (crop_width // 2))
top_left_y = int(centroid_y - (crop_height // 2))
top_left_x = max(0, min(top_left_x, frame_width - crop_width))
top_left_y = max(0, min(top_left_y, frame_height - crop_height))
try:
roi = frame_image[top_left_y:top_left_y + crop_height, top_left_x:top_left_x + crop_width]
crop_gray = cv2.cvtColor(crop_img, cv2.COLOR_BGR2GRAY)
_, mask = cv2.threshold(crop_gray, 1, 255, cv2.THRESH_BINARY)
mask_inv = cv2.bitwise_not(mask)
bg_part = cv2.bitwise_and(roi, roi, mask=mask_inv)
fg_part = cv2.bitwise_and(crop_img, crop_img, mask=mask)
dst = cv2.add(bg_part, fg_part)
frame_image[top_left_y:top_left_y + crop_height, top_left_x:top_left_x + crop_width] = dst
objects_in_frame += 1
used_tracks.add(track_id)
except Exception as e:
print(f"Error blending object in synopsis frame {current_frame_num}, track {track_id}: {str(e)}")
# Optionally write the frame to the video
if video_output:
video_writer.write(frame_image)
current_frame_num += 1
# Release video writer if it was used
if video_output:
video_writer.release()
print(f"Video synopsis complete. Frames saved to {output_dir}.")
if video_output:
print(f"Video saved to {video_output}.")
# Example usage
bg_image_path = "background.jpg"
csv_file = "optimized_person_tracks.csv"
crops_dir = ""
output_dir = "synopsis_frames"
video_output = "synopsis_video.mp4" # Set to None if you don't need a video output
generate_video_synopsis(bg_image_path, csv_file, crops_dir, output_dir, video_output=video_output, fps=30, max_objects_per_frame=4)