-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcreate_video
executable file
·240 lines (192 loc) · 10.7 KB
/
create_video
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
#!/usr/bin/env python3
import os
import sys
import argparse
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from pydub import AudioSegment
from moviepy.editor import ImageClip, AudioFileClip, VideoClip, CompositeVideoClip, VideoFileClip
from moviepy.video.io.bindings import mplfig_to_npimage
from PIL import Image, ImageEnhance
# Load the audio file
def load_audio(wav_file):
return AudioSegment.from_wav(wav_file)
def multiply_blend(background, overlay):
"""Apply multiply blend mode to background and overlay images."""
# Ensure both images are numpy arrays
background = np.asarray(background).astype(np.float32) / 255.0
overlay = np.asarray(overlay).astype(np.float32) / 255.0
# Apply multiply blend mode
result = background * overlay
# Rescale back to 0-255 and convert to uint8
result = (result * 255).astype(np.uint8)
return result
def overlay_blend(background, overlay):
"""Apply overlay blend mode to background and overlay images."""
# Ensure both images are numpy arrays
background = np.asarray(background).astype(np.float32) / 255.0
overlay = np.asarray(overlay).astype(np.float32) / 255.0
# Apply overlay blend mode
result = np.where(overlay < 0.5,
2 * background * overlay,
1 - 2 * (1 - background) * (1 - overlay))
# Rescale back to 0-255 and convert to uint8
result = (result * 255).astype(np.uint8)
return result
def create_waveform_clip(audio, media_file, fps=30, duration=10, frame_size=(1280, 720), waveform_height=100, waveform_color='blue', glow_color='blue', overlay_image=None, bw=True, swap_layers=False):
samples = np.array(audio.get_array_of_samples())
samples = samples / (np.max(np.abs(samples)) + 1e-10) # Normalize with epsilon
# Get the number of samples per frame
samples_per_frame = int(len(samples) / (fps * duration))
# Check if media_file is a video or image by checking the file extension
ext = os.path.splitext(media_file)[1].lower()
if ext in ['.webm','.mp4', '.mov', '.avi']: # List common video extensions
# If it's a video, load and loop it
background_clip = VideoFileClip(media_file).loop(duration=duration)
is_video = True
else:
# If it's an image, load it as a still frame
img = np.array(Image.open(media_file).resize((1280, 720), Image.LANCZOS))
is_video = False
# Load the overlay image if provided
if overlay_image:
overlay_img = Image.open(overlay_image).resize((1280, 720), Image.LANCZOS)
has_overlay = True
else:
has_overlay = False
# Set up the figure and axis with the correct size and dpi
scale_factor = 2.0 # Define the scaling factor
fig, ax = plt.subplots(figsize=(frame_size[0] / 100 * scale_factor, frame_size[1] / 100 * scale_factor), dpi=100 / scale_factor)
def make_frame(t, glow_scale=1.0):
# Get the correct slice of audio samples for the current frame
start_idx = int(t * samples_per_frame * fps)
end_idx = min(len(samples), start_idx + samples_per_frame)
frame_samples = samples[start_idx:end_idx]
# Clear the axis for the new frame
ax.clear()
# If a video is used, extract the frame for the current time and plot it behind the waveform
if is_video:
frame = background_clip.get_frame(t)
frame_pil = Image.fromarray(frame) # Convert video frame to PIL Image
# Blend the video frame with the overlay image if provided
if has_overlay:
fg = overlay_img.resize(frame_pil.size, Image.LANCZOS)
if swap_layers is True:
bg = fg
fg = frame_pil
else:
bg = frame_pil
if bw:
bg = bg.convert('L').convert('RGB')
# Apply overlay blend mode
blended_frame = blend_mode_function(bg, fg)
frame = np.array(blended_frame) # Convert back to numpy array for plotting
ax.imshow(frame, interpolation='none', origin='upper', extent=[0, frame_size[0], 0, frame_size[1]], aspect='auto', zorder=0) # Set extent to fill canvas
else:
# Use the static image and plot it behind the waveform
ax.imshow(img, interpolation='none', origin='upper', extent=[0, frame_size[0], 0, frame_size[1]], aspect='auto', zorder=0) # Set extent to fill canvas
ax.autoscale(False)
# Scale the waveform and center it vertically in the frame
waveform_center = frame_size[1] / 2 # Middle of the frame
scaled_samples = frame_samples * (waveform_height / 2) # Scale waveform to specified height
x_values = np.linspace(0, frame_size[0], len(scaled_samples))
# Define base widths and alphas for the glow effect
base_glow_widths = [20, 18, 16, 14, 12, 10] # Base line widths
base_glow_alphas = [0.05, 0.1, 0.15, 0.2, 0.25, 0.3] # Base alpha values
# Apply the scale to widths
glow_widths = [width * glow_scale for width in base_glow_widths]
# Apply the scale to alphas, and constrain them to be between 0 and 1
glow_alphas = [min(alpha * glow_scale, 1) for alpha in base_glow_alphas]
# Create the glow effect by plotting several layers with increasing width and transparency
for glow_width, glow_alpha in zip(glow_widths, glow_alphas):
ax.plot(x_values, scaled_samples + waveform_center, lw=glow_width, color=glow_color, alpha=glow_alpha, zorder=2) # Glow effect at zorder 2
# Plot the main waveform on top
ax.plot(x_values, scaled_samples + waveform_center, lw=2, color=waveform_color, zorder=3) # Main waveform at zorder 3
# Remove all axis elements to ensure a clean plot
ax.set_axis_off()
# Set the size of the figure explicitly to match the output frame size
fig.set_size_inches(frame_size[0] / 100, frame_size[1] / 100) # Size in inches, assuming 100 dpi
# Set aspect ratio to prevent stretching and maintain the correct size
ax.set_aspect(aspect='auto')
# Adjust the plot to fill the entire figure without adding extra space
plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
# Remove any automatic margins and set the limits explicitly
ax.margins(0)
ax.set_xlim([0, frame_size[0]])
# Use tight layout to further ensure no padding
plt.tight_layout(pad=0)
# Return the figure as an image with no border or padding
return mplfig_to_npimage(fig)
video_clip = VideoClip(make_frame, duration=duration)
# Debug: Check if the video clip is being created
if video_clip is None:
print("Failed to create the video clip.")
else:
print("Video clip created successfully.")
return video_clip
def create_mp4_from_wav_with_waveform(wav_file, image_file, output_file, fps=30, frame_size=(1280, 720), waveform_height=100, waveform_color='blue', glow_color='blue', overlay_image=None, bw=True, swap_layers=False):
audio = load_audio(wav_file)
audio_clip = AudioFileClip(wav_file)
# Create the waveform animation synchronized with the audio
waveform_clip = create_waveform_clip(audio, image_file, fps=fps, duration=audio_clip.duration, frame_size=frame_size, waveform_height=waveform_height, waveform_color=waveform_color, glow_color=glow_color, overlay_image=overlay_image, bw=bw, swap_layers=swap_layers)
# Combine the waveform with audio
final_clip = waveform_clip.set_audio(audio_clip)
# Write to file with AAC codec for audio
final_clip.write_videofile(output_file, fps=fps, audio_codec='aac', codec="libx264")
def parse_arguments():
parser = argparse.ArgumentParser(description='Create an MP4 video with an animated waveform from a WAV file and an image or video background.')
parser.add_argument('image_file', help='The image or video file to use as the background.')
parser.add_argument('audio_file', help='The audio WAV file to generate the waveform from.')
parser.add_argument('output_file', help='The output MP4 file.')
parser.add_argument('--fps', type=int, default=30, help='Frames per second for the output video (default: 30).')
parser.add_argument('--frame_size', type=str, default='1280x720', help='Frame size for the video as WIDTHxHEIGHT (default: 1280x720).')
parser.add_argument('--waveform_height', type=int, default=100, help='Height of the waveform in pixels (default: 100).')
parser.add_argument('--waveform_color', type=str, default='blue', help='Color of the waveform (default: blue).')
parser.add_argument('--glow_color', type=str, default='blue', help='Color of the glow around the waveform (default: blue).')
parser.add_argument('--overlay_image', help='Optional overlay image to blend with the background.')
parser.add_argument('--blend_mode',
default='multiply',
choices=['multiply', 'overlay'], # Add valid options here
help='The blend mode to use (e.g., multiply, overlay). Default is "multiply".')
# Add a flag for enabling/disabling black & white conversion
parser.add_argument(
'--disable_bw',
action='store_false', # When set, this flag becomes True
help='Disable black & white conversion before applying the overlay.'
)
parser.add_argument('--swap_overlay_layers',
action='store_true',
help='The still image overlays the video frame by default.')
return parser.parse_args()
def main():
args = parse_arguments()
# Dynamically resolve the blend mode function
blend_mode_function_name = f"{args.blend_mode}_blend"
# Parse the frame size from string format
try:
global blend_mode_function
blend_mode_function = getattr(sys.modules[__name__], blend_mode_function_name)
width, height = map(int, args.frame_size.split('x'))
frame_size = (width, height)
except ValueError:
print(f"Invalid frame size format: {args.frame_size}. Expected format is WIDTHxHEIGHT.")
sys.exit(1)
except AttributeError:
print(f"Error: Blend mode function '{blend_mode_function_name}' not found.")
sys.exit(1)
create_mp4_from_wav_with_waveform(
wav_file=args.audio_file,
image_file=args.image_file,
output_file=args.output_file,
fps=args.fps,
frame_size=frame_size,
waveform_height=args.waveform_height,
waveform_color=args.waveform_color,
glow_color=args.glow_color,
overlay_image=args.overlay_image,
bw=args.disable_bw if args.disable_bw is not None and args.overlay_image is not None else True,
swap_layers=args.swap_overlay_layers
)
if __name__ == "__main__":
main()