create_video

#!/usr/bin/env python3

import os
import sys
import argparse
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from pydub import AudioSegment
from moviepy.editor import ImageClip, AudioFileClip, VideoClip, CompositeVideoClip, VideoFileClip
from moviepy.video.io.bindings import mplfig_to_npimage
from PIL import Image, ImageEnhance

# Load the audio file
def load_audio(wav_file):
   return AudioSegment.from_wav(wav_file)

def multiply_blend(background, overlay):
    """Apply multiply blend mode to background and overlay images."""
    # Ensure both images are numpy arrays
    background = np.asarray(background).astype(np.float32) / 255.0
    overlay = np.asarray(overlay).astype(np.float32) / 255.0

    # Apply multiply blend mode
    result = background * overlay

    # Rescale back to 0-255 and convert to uint8
    result = (result * 255).astype(np.uint8)
    return result

def overlay_blend(background, overlay):
    """Apply overlay blend mode to background and overlay images."""
    # Ensure both images are numpy arrays
    background = np.asarray(background).astype(np.float32) / 255.0
    overlay = np.asarray(overlay).astype(np.float32) / 255.0

    # Apply overlay blend mode
    result = np.where(overlay < 0.5, 
                      2 * background * overlay, 
                      1 - 2 * (1 - background) * (1 - overlay))

    # Rescale back to 0-255 and convert to uint8
    result = (result * 255).astype(np.uint8)
    return result

def create_waveform_clip(audio, media_file, fps=30, duration=10, frame_size=(1280, 720), waveform_height=100, waveform_color='blue', glow_color='blue', overlay_image=None, bw=True, swap_layers=False):
    samples = np.array(audio.get_array_of_samples())
    samples = samples / (np.max(np.abs(samples)) + 1e-10)  # Normalize with epsilon

    # Get the number of samples per frame
    samples_per_frame = int(len(samples) / (fps * duration))

    # Check if media_file is a video or image by checking the file extension
    ext = os.path.splitext(media_file)[1].lower()
    if ext in ['.webm','.mp4', '.mov', '.avi']:  # List common video extensions
        # If it's a video, load and loop it
        background_clip = VideoFileClip(media_file).loop(duration=duration)
        is_video = True
    else:
        # If it's an image, load it as a still frame
        img = np.array(Image.open(media_file).resize((1280, 720), Image.LANCZOS))
        is_video = False

    # Load the overlay image if provided
    if overlay_image:
        overlay_img = Image.open(overlay_image).resize((1280, 720), Image.LANCZOS)
        has_overlay = True
    else:
        has_overlay = False

    # Set up the figure and axis with the correct size and dpi
    scale_factor = 2.0  # Define the scaling factor
    fig, ax = plt.subplots(figsize=(frame_size[0] / 100 * scale_factor, frame_size[1] / 100 * scale_factor), dpi=100 / scale_factor)

    def make_frame(t, glow_scale=1.0):
        # Get the correct slice of audio samples for the current frame
        start_idx = int(t * samples_per_frame * fps)
        end_idx = min(len(samples), start_idx + samples_per_frame)
        frame_samples = samples[start_idx:end_idx]
    
        # Clear the axis for the new frame
        ax.clear()

        # If a video is used, extract the frame for the current time and plot it behind the waveform
        if is_video:
            frame = background_clip.get_frame(t)
            frame_pil = Image.fromarray(frame)  # Convert video frame to PIL Image

            # Blend the video frame with the overlay image if provided
            if has_overlay:
                fg = overlay_img.resize(frame_pil.size, Image.LANCZOS)
                if swap_layers is True:
                  bg = fg
                  fg = frame_pil
                else:
                  bg = frame_pil
                if bw:
                  bg = bg.convert('L').convert('RGB') 
                # Apply overlay blend mode
                blended_frame = blend_mode_function(bg, fg)
                frame = np.array(blended_frame)  # Convert back to numpy array for plotting
            ax.imshow(frame, interpolation='none', origin='upper', extent=[0, frame_size[0], 0, frame_size[1]], aspect='auto', zorder=0)  # Set extent to fill canvas
        else:
            # Use the static image and plot it behind the waveform
            ax.imshow(img, interpolation='none', origin='upper', extent=[0, frame_size[0], 0, frame_size[1]], aspect='auto', zorder=0)  # Set extent to fill canvas
    
        ax.autoscale(False)

        # Scale the waveform and center it vertically in the frame
        waveform_center = frame_size[1] / 2  # Middle of the frame
        scaled_samples = frame_samples * (waveform_height / 2)  # Scale waveform to specified height
        x_values = np.linspace(0, frame_size[0], len(scaled_samples))
        
        # Define base widths and alphas for the glow effect
        base_glow_widths = [20, 18, 16, 14, 12, 10]  # Base line widths
        base_glow_alphas = [0.05, 0.1, 0.15, 0.2, 0.25, 0.3]  # Base alpha values
    
        # Apply the scale to widths
        glow_widths = [width * glow_scale for width in base_glow_widths]
    
        # Apply the scale to alphas, and constrain them to be between 0 and 1
        glow_alphas = [min(alpha * glow_scale, 1) for alpha in base_glow_alphas]
    
        # Create the glow effect by plotting several layers with increasing width and transparency
        for glow_width, glow_alpha in zip(glow_widths, glow_alphas):
            ax.plot(x_values, scaled_samples + waveform_center, lw=glow_width, color=glow_color, alpha=glow_alpha, zorder=2)  # Glow effect at zorder 2
    
        # Plot the main waveform on top
        ax.plot(x_values, scaled_samples + waveform_center, lw=2, color=waveform_color, zorder=3)  # Main waveform at zorder 3

        # Remove all axis elements to ensure a clean plot
        ax.set_axis_off()

        # Set the size of the figure explicitly to match the output frame size
        fig.set_size_inches(frame_size[0] / 100, frame_size[1] / 100)  # Size in inches, assuming 100 dpi
    
        # Set aspect ratio to prevent stretching and maintain the correct size
        ax.set_aspect(aspect='auto')
    
        # Adjust the plot to fill the entire figure without adding extra space
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)

        # Remove any automatic margins and set the limits explicitly
        ax.margins(0)
        ax.set_xlim([0, frame_size[0]])

        # Use tight layout to further ensure no padding
        plt.tight_layout(pad=0)
    
        # Return the figure as an image with no border or padding
        return mplfig_to_npimage(fig)

    video_clip = VideoClip(make_frame, duration=duration)

    # Debug: Check if the video clip is being created
    if video_clip is None:
        print("Failed to create the video clip.")
    else:
        print("Video clip created successfully.")

    return video_clip

def create_mp4_from_wav_with_waveform(wav_file, image_file, output_file, fps=30, frame_size=(1280, 720), waveform_height=100, waveform_color='blue', glow_color='blue', overlay_image=None, bw=True, swap_layers=False):
    audio = load_audio(wav_file)
    audio_clip = AudioFileClip(wav_file)

    # Create the waveform animation synchronized with the audio
    waveform_clip = create_waveform_clip(audio, image_file, fps=fps, duration=audio_clip.duration, frame_size=frame_size, waveform_height=waveform_height, waveform_color=waveform_color, glow_color=glow_color, overlay_image=overlay_image, bw=bw, swap_layers=swap_layers)

    # Combine the waveform with audio
    final_clip = waveform_clip.set_audio(audio_clip)

    # Write to file with AAC codec for audio
    final_clip.write_videofile(output_file, fps=fps, audio_codec='aac', codec="libx264")

def parse_arguments():
    parser = argparse.ArgumentParser(description='Create an MP4 video with an animated waveform from a WAV file and an image or video background.')

    parser.add_argument('image_file', help='The image or video file to use as the background.')
    parser.add_argument('audio_file', help='The audio WAV file to generate the waveform from.')
    parser.add_argument('output_file', help='The output MP4 file.')
    
    parser.add_argument('--fps', type=int, default=30, help='Frames per second for the output video (default: 30).')
    parser.add_argument('--frame_size', type=str, default='1280x720', help='Frame size for the video as WIDTHxHEIGHT (default: 1280x720).')
    parser.add_argument('--waveform_height', type=int, default=100, help='Height of the waveform in pixels (default: 100).')
    parser.add_argument('--waveform_color', type=str, default='blue', help='Color of the waveform (default: blue).')
    parser.add_argument('--glow_color', type=str, default='blue', help='Color of the glow around the waveform (default: blue).')
    parser.add_argument('--overlay_image', help='Optional overlay image to blend with the background.')
    parser.add_argument('--blend_mode',
       default='multiply',
       choices=['multiply', 'overlay'],  # Add valid options here
       help='The blend mode to use (e.g., multiply, overlay). Default is "multiply".')

    # Add a flag for enabling/disabling black & white conversion
    parser.add_argument(
        '--disable_bw', 
        action='store_false',  # When set, this flag becomes True
        help='Disable black & white conversion before applying the overlay.'
    )
    parser.add_argument('--swap_overlay_layers',
      action='store_true',
      help='The still image overlays the video frame by default.')

    return parser.parse_args()

def main():
    args = parse_arguments()

    # Dynamically resolve the blend mode function
    blend_mode_function_name = f"{args.blend_mode}_blend"
    
    # Parse the frame size from string format
    try:
        global blend_mode_function
        blend_mode_function = getattr(sys.modules[__name__], blend_mode_function_name)
        width, height = map(int, args.frame_size.split('x'))
        frame_size = (width, height)
    except ValueError:
        print(f"Invalid frame size format: {args.frame_size}. Expected format is WIDTHxHEIGHT.")
        sys.exit(1)
    except AttributeError:
        print(f"Error: Blend mode function '{blend_mode_function_name}' not found.")
        sys.exit(1)

    create_mp4_from_wav_with_waveform(
        wav_file=args.audio_file,
        image_file=args.image_file,
        output_file=args.output_file,
        fps=args.fps,
        frame_size=frame_size,
        waveform_height=args.waveform_height,
        waveform_color=args.waveform_color,
        glow_color=args.glow_color,
        overlay_image=args.overlay_image,
        bw=args.disable_bw if args.disable_bw is not None and args.overlay_image is not None else True,
        swap_layers=args.swap_overlay_layers
    )

if __name__ == "__main__":
    main()