Skip to content

Commit

Permalink
🧱 Scale preview video to 480p and keep audio file
Browse files Browse the repository at this point in the history
  • Loading branch information
phlmn committed Dec 8, 2023
1 parent 8c05d9d commit 06ba47a
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 11 deletions.
5 changes: 4 additions & 1 deletion frontend/src/editor/player.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,10 @@ export function PlayerBar({ documentId, editor }: { documentId: string; editor:
const relevantMediaFiles =
data?.media_files.filter((media) => !media.tags.includes('original')) || [];

const mappedFiles = relevantMediaFiles.map((media) => {
const videoFiles = relevantMediaFiles.filter((media) => media.tags.includes('video'));
const audioFiles = relevantMediaFiles.filter((media) => !media.tags.includes('video'));

const mappedFiles = [...videoFiles, ...audioFiles].map((media) => {
return {
src: media.url,
type: media.content_type,
Expand Down
9 changes: 9 additions & 0 deletions worker/transcribee_worker/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,22 @@ class Settings(BaseSettings):
"audio_bitrate": "128k",
"ac": "1",
},
"m4a": {
"format": "mp4",
"audio_bitrate": "128k",
"ac": "1",
},
"video:mp4": {
"format": "mp4",
"audio_bitrate": "128k",
"ac": "1",
"c:v": "libx264",
"crf": "26",
"preset": "faster",
# downscale to 480p and pad to multiple of 2 (needed for libx264)
"vf": "scale='min(854,iw)':'min(480,ih)'"
":force_original_aspect_ratio=decrease,"
"pad='iw+mod(iw\\,2)':'ih+mod(ih\\,2)",
},
}

Expand Down
10 changes: 1 addition & 9 deletions worker/transcribee_worker/reencode.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,6 @@ def get_duration(input_path: Path):
return float(ffmpeg.probe(input_path)["format"]["duration"])


def has_video(input_path: Path):
streams = ffmpeg.probe(input_path)["streams"]
for stream in streams:
if stream["codec_type"] == "video":
return True
return False


async def reencode(
input_path: Path,
output_path: Path,
Expand All @@ -29,7 +21,7 @@ async def reencode(
def work(_):
pipeline = ffmpeg.input(input_path)
streams = [pipeline.audio]
if include_video and has_video(input_path):
if include_video:
streams.append(pipeline.video)

cmd: subprocess.Popen = ffmpeg.output(
Expand Down
27 changes: 26 additions & 1 deletion worker/transcribee_worker/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from typing import Any, AsyncGenerator, Optional, Tuple

import automerge
import ffmpeg
import numpy.typing as npt
from pydantic import parse_raw_as
from transcribee_proto.api import (
Expand Down Expand Up @@ -74,6 +75,19 @@ def get_last_atom_end(doc: EditorDocument):
return 0


def media_has_video(path: Path):
streams = ffmpeg.probe(path)["streams"]
for stream in streams:
if stream["codec_type"] == "video":
if stream["disposition"]["attached_pic"] != 0:
# ignore album covers
continue

return True

return False


class Worker:
base_url: str
token: str
Expand Down Expand Up @@ -256,9 +270,17 @@ async def reencode(
self.set_duration(task, duration)

n_profiles = len(settings.REENCODE_PROFILES)

has_video = media_has_video(document_audio)

for i, (profile, parameters) in enumerate(settings.REENCODE_PROFILES.items()):
output_path = self._get_tmpfile(f"reencode_{profile.replace(':', '_')}")

video_profile = profile.startswith("video:")

if video_profile and not has_video:
continue

await reencode(
document_audio,
output_path,
Expand All @@ -269,11 +291,14 @@ async def reencode(
**kwargs,
),
duration,
include_video=(profile.startswith("video:")),
include_video=video_profile,
)

tags = [f"profile:{profile}"] + [f"{k}:{v}" for k, v in parameters.items()]

if video_profile:
tags.append("video")

loop = asyncio.get_running_loop()
await loop.run_in_executor(
None, self.add_document_media_file, task, output_path, tags
Expand Down

0 comments on commit 06ba47a

Please sign in to comment.