From c9e7052e2a2432b375d895f510e82a37e8ad029a Mon Sep 17 00:00:00 2001 From: winlin Date: Mon, 22 Jul 2024 20:08:40 +0800 Subject: [PATCH] Dubbing: Fix bug of changing window. v5.15.20 --- DEVELOPER.md | 14 ++++++++++++-- platform/dubbing.go | 37 ++++++++++++++++--------------------- 2 files changed, 28 insertions(+), 23 deletions(-) diff --git a/DEVELOPER.md b/DEVELOPER.md index af8bb439..a75c4cab 100644 --- a/DEVELOPER.md +++ b/DEVELOPER.md @@ -937,6 +937,16 @@ youtube-dl --proxy socks5://127.0.0.1:10000 --output srs 'https://youtu.be/Sqraz > Note: Setup the `--output TEMPLATE` when wants to define the filename. +## Regenrate ASR for Dubbing + +Create a `regenerate.txt` under the project file, then restart Oryx and refresh the page: + +```bash +touch ./platform/containers/data/dubbing/4830675a-7945-48fe-bed9-72e6fa904a19/regenerate.txt +``` + +Oryx will regenerate the ASR and translation, then delete the `regenerate.txt` to make sure it executes one time. + ## WebRTC Candidate Oryx follows the rules for WebRTC candidate, see [CANDIDATE](https://ossrs.io/lts/en-us/docs/v5/doc/webrtc#config-candidate), @@ -1272,8 +1282,8 @@ The following are the update records for the Oryx server. * Dubbing: Merge more words if in small duration. v5.15.17 * Dubbing: Allow fullscreen when ASR. v5.15.18 * Dubbing: Support disable asr or translation. v5.15.19 - * Dubbing: Fix bug when changing ASR segment size. [v5.15.20](https://github.com/ossrs/oryx/releases/tag/v5.15.20) - * Dubbing: Refine the window of text. v5.15.21 + * Dubbing: Fix bug when changing ASR segment size. v5.15.20 + * Dubbing: Refine the window of text. [v5.15.20](https://github.com/ossrs/oryx/releases/tag/v5.15.20) * v5.14: * Merge features and bugfix from releases. v5.14.1 * Dubbing: Support VoD dubbing for multiple languages. [v5.14.2](https://github.com/ossrs/oryx/releases/tag/v5.14.2) diff --git a/platform/dubbing.go b/platform/dubbing.go index dcdbdd30..83e00fdd 100644 --- a/platform/dubbing.go +++ b/platform/dubbing.go @@ -1550,27 +1550,14 @@ func (v *AudioResponse) QueryGroup(uuid string) *AudioGroup { return nil } -func (v *AudioResponse) FindAnySegmentMatchStarttime(starttime float64) *AudioSegment { - for i, g := range v.Groups { - var nextGroup *AudioGroup - if i < len(v.Groups)-1 { - nextGroup = v.Groups[i+1] - } - - firstSegment, lastSegment := g.FirstSegment(), g.LastSegment() - if nextGroup != nil { - lastSegment = nextGroup.LastSegment() - } - - if firstSegment == nil || lastSegment == nil { - continue - } - - if firstSegment.OriginalStart <= starttime && starttime <= lastSegment.OriginalStart { - return firstSegment +func (v *AudioResponse) FindAnySegmentMatchStarttime(starttime float64) []*AudioGroup { + var matched []*AudioGroup + for _, g := range v.Groups { + if first := g.FirstSegment(); first != nil && starttime < first.Start { + matched = append(matched, g) } } - return nil + return matched } func (v *AudioResponse) AppendSegment(resp openai.AudioResponse, starttime float64) { @@ -1798,7 +1785,14 @@ func (v *SrsDubbingTask) Start(ctx context.Context) error { // Whether force to generate ASR response. if alwaysForceRegenerateASRResponse { - v.AsrResponse = NewAudioResponse() + v.AsrResponse = nil + } + + // Whether exists command file to regenerate ASR. + regenerateASR := path.Join(conf.Pwd, aiDubbingWorkDir, v.project.UUID, "regenerate.txt") + if _, err := os.Stat(regenerateASR); err == nil { + os.Remove(regenerateASR) + v.AsrResponse = nil } if v.AsrResponse == nil { @@ -1815,7 +1809,8 @@ func (v *SrsDubbingTask) Start(ctx context.Context) error { break } - if v.AsrResponse.FindAnySegmentMatchStarttime(starttime) != nil { + // If there are some matched groups about startime, do not generate the ASR for it. + if matches := v.AsrResponse.FindAnySegmentMatchStarttime(starttime); len(matches) > 10 { continue }