[emapp] fix several encoding video related bugs (#63)

* [emapp] fixed a bug partial audio source doesn't be applied at encoding video * [emapp] add prevent setting end of encoding larger than project frame index * [emapp] fixed a bug plugin_lsmash cannot estimate file size properly * [emapp] fix warnings and remove debug print * [plugin] fixed an incorrect output sample count * [emapp] clang-format * [plugin] make audio track first
hkrn · Oct 1, 2021 · 7c54542 · 7c54542
1 parent eff5880
commit 7c54542
Show file tree

Hide file tree

Showing 7 changed files with 87 additions and 58 deletions.
diff --git a/docs/change_log.rst b/docs/change_log.rst
@@ -27,6 +27,12 @@
 * Windows 版において言語設定を行うとメニューアイテムの状態が不正なものになる
 * ビューポートのジャギーを減らすようにした
 * 「:ref:`A843137E-D975-47B7-86F1-E018BF189873`」パネルの「補間」が「補完」になっている
+* 動画出力において音源ありで開始を 0 より大きく設定して開始した場合場合音ズレが起きる
+
+  * 出力の開始箇所にかかわらず音源の最初からはじまる不具合があった
+
+* 動画出力において書き出しの終端がプロジェクトの終端フレームより大きく設定できてしまう問題
+* プロジェクトの再生 FPS の設定により ``plugin_lsmash`` の出力見積りが実際より少なく表示されることがある
 
 34.0.0 (2021/9/17)
 ******************************************

diff --git a/docs/trouble_shooting.rst b/docs/trouble_shooting.rst
@@ -68,7 +68,7 @@ QuickTime/VLC/IINA いずれも読めない場合はそもそも動画として
 
 .. important::
    31.5.0 以降 nmm 形式においてプロジェクトファイル読み込み時に部分的にファイルが読み込まれなかった場合は何が読み込まれなかったのかをダイアログ表示するようにしました。
-   
+
    25.0.0 以降では音源または動画のファイルが見つからない場合はエラーではなく単純に読み込みを無視するようになりました。
    そのため、音源または動画が読み込まれない場合は元のファイルが何らかの理由で見つからないことが原因の可能性があります。
 
@@ -189,6 +189,16 @@ IK の仕様です。特にモーションの作成元モデルと読み込み
 32.0 以上の場合でも場合によってはアンチエイリアスを有効にした状態で落ちることがあります。
 その場合は別途アンチエイリアスエフェクトを利用する形でアンチエイリアスを無効にして書き出してください。
 
+動画出力で音ズレが起きる
+=============================================================================
+
+34.1 未満の場合以下の条件を満たす場合に途中から書き出しにもかかわらず音源の最初からはじまる不具合がありました。
+
+* 音源を読み込ませている
+* 動画書き出しの設定で開始フレームを０より大きく設定している
+
+34.1 以降で修正していますが、34.1 未満の場合は開始フレームを０に設定して動画を書き出して動画編集で調整してください。
+
 動画出力で書き出すと意図しない（例えば真っ黒）動画が出力される
 =============================================================================
 

diff --git a/emapp/include/emapp/internal/CapturingPassState.h b/emapp/include/emapp/internal/CapturingPassState.h
@@ -105,8 +105,8 @@ class CapturingPassState : private NonCopyable {
     sg_pass outputPass() const;
     sg_buffer frameStagingBuffer() const;
     nanoem_frame_index_t startFrameIndex() const;
-    nanoem_frame_index_t lastPTS() const;
-    void setLastPTS(nanoem_frame_index_t value);
+    nanoem_frame_index_t lastVideoPTS() const;
+    void setLastVideoPTS(nanoem_frame_index_t value);
     bool hasSaveState() const;
 
 private:
@@ -127,7 +127,7 @@ class CapturingPassState : private NonCopyable {
     nanoem_u32_t m_lastPreferredMotionFPS;
     nanoem_u32_t m_lastSampleLevel;
     nanoem_u32_t m_sampleLevel;
-    nanoem_frame_index_t m_lastPTS;
+    nanoem_frame_index_t m_lastVideoPTS;
     nanoem_frame_index_t m_startFrameIndex;
     volatile int m_asyncCount;
     int m_blittedCount;
@@ -184,12 +184,13 @@ class CapturingPassAsVideoState NANOEM_DECL_SEALED : public CapturingPassState {
         nanoem_f32_t deltaScaleFactor, nanoem_f32_t &amount, nanoem_frame_index_t &frameIndex);
 
     nanoem_frame_index_t duration() const NANOEM_DECL_NOEXCEPT;
-    void handleCaptureViaVideoRecorder(Project *project, nanoem_frame_index_t frameIndex,
-        nanoem_frame_index_t videoFrameIndex, nanoem_frame_index_t durationFrameIndices, nanoem_f32_t deltaScaleFactor);
-    void handleCaptureViaEncoderPlugin(Project *project, nanoem_frame_index_t frameIndex,
-        nanoem_frame_index_t videoFrameIndex, nanoem_frame_index_t durationFrameIndices, nanoem_f32_t deltaScaleFactor,
+    void handleCaptureViaVideoRecorder(Project *project, nanoem_frame_index_t frameIndex, nanoem_frame_index_t audioPTS,
+        nanoem_frame_index_t videoPTS, nanoem_frame_index_t durationFrameIndices, nanoem_f32_t deltaScaleFactor);
+    void handleCaptureViaEncoderPlugin(Project *project, nanoem_frame_index_t frameIndex, nanoem_frame_index_t audioPTS,
+        nanoem_frame_index_t videoPTS, nanoem_frame_index_t durationFrameIndices, nanoem_f32_t deltaScaleFactor,
         Error &error);
-    bool encodeVideoFrame(const ByteArray &frameData, nanoem_frame_index_t pts, Error &error);
+    bool encodeVideoFrame(
+        const ByteArray &frameData, nanoem_frame_index_t audioPTS, nanoem_frame_index_t videoPTS, Error &error);
     void seekAndProgress(Project *project, nanoem_frame_index_t frameIndex, nanoem_frame_index_t durationFrameIndices);
     void finishEncoding();
     void stopEncoding(Error &error);

diff --git a/emapp/plugins/ffmpeg/ffmpeg.cc b/emapp/plugins/ffmpeg/ffmpeg.cc
@@ -52,6 +52,8 @@ struct FFmpegEncoder {
     static const char kAudioCodecComponentID[];
     static const char kVideoCodecComponentID[];
     static const char kVideoPixelFormatComponentID[];
+    static const int kMinimumSampleRate;
+    static const int kMinimumNumChannels;
 
     FFmpegEncoder()
         : m_formatContext(nullptr)
@@ -89,8 +91,8 @@ struct FFmpegEncoder {
         if ((m_audioStream = avformat_new_stream(m_formatContext, codec)) != nullptr) {
             m_audioCodecContext = avcodec_alloc_context3(codec);
             m_audioCodecContext->sample_fmt = AV_SAMPLE_FMT_S16;
-            m_audioCodecContext->sample_rate = std::max(m_numFrequency, 44100);
-            m_audioCodecContext->channels = std::max(m_numChannels, 2);
+            m_audioCodecContext->sample_rate = std::max(m_numFrequency, kMinimumSampleRate);
+            m_audioCodecContext->channels = std::max(m_numChannels, kMinimumNumChannels);
             m_audioCodecContext->channel_layout = av_get_default_channel_layout(m_audioCodecContext->channels);
             m_audioCodecContext->time_base.num = 1;
             m_audioCodecContext->time_base.den = m_audioCodecContext->sample_rate;
@@ -243,7 +245,8 @@ struct FFmpegEncoder {
         nanoem_application_plugin_status_t *status)
     {
         int inputSampleCount = size / (m_numChannels * (m_numBits / 8)),
-            outputSampleCount = av_rescale_rnd(inputSampleCount, 44100, m_numFrequency, AV_ROUND_UP);
+            outputSampleCount =
+                av_rescale_rnd(inputSampleCount, m_audioCodecContext->sample_rate, m_numFrequency, AV_ROUND_UP);
         ScopedAudioFrame output(m_audioStream->codecpar, outputSampleCount, m_nextAudioPTS);
         if (!wrapCall(av_frame_get_buffer(output, 0), status) || !wrapCall(av_frame_make_writable(output), status)) {
             return;
@@ -561,6 +564,8 @@ struct FFmpegEncoder {
 const char FFmpegEncoder::kAudioCodecComponentID[] = "ffmpeg.audio-codec";
 const char FFmpegEncoder::kVideoCodecComponentID[] = "ffmpeg.video-codec";
 const char FFmpegEncoder::kVideoPixelFormatComponentID[] = "ffmpeg.video-pixel-format";
+const int FFmpegEncoder::kMinimumSampleRate = 44100;
+const int FFmpegEncoder::kMinimumNumChannels = 2;
 
 struct FFmpegDecoder {
     FFmpegDecoder()

diff --git a/emapp/plugins/lsmash/lsmash.cc b/emapp/plugins/lsmash/lsmash.cc
@@ -268,25 +268,23 @@ struct LSmashEncoder {
                     crop, m_videoSummary->width, m_videoSummary->height, &m_videoSummary->clap);
                 m_root = lsmash_create_root();
                 m_file = lsmash_set_file(m_root, &m_fileParameters);
-                m_videoMediaParameters.timescale = m_movieParameters.timescale;
-                nanoem_u32_t videoTrackID = lsmash_create_track(m_root, ISOM_MEDIA_HANDLER_TYPE_VIDEO_TRACK);
-                nanoem_u32_t videoSummaryIndex = lsmash_add_sample_entry(m_root, videoTrackID, m_videoSummary);
-                lsmash_set_movie_parameters(m_root, &m_movieParameters);
-                lsmash_set_track_parameters(m_root, videoTrackID, &m_videoTrackParameters);
-                lsmash_set_media_parameters(m_root, videoTrackID, &m_videoMediaParameters);
+                nanoem_u32_t audioTrackID = 0, audioSummaryIndex = 0;
                 if (m_audioSummary->frequency > 0 && m_audioSummary->channels > 0 && m_audioSummary->sample_size > 0) {
                     m_audioSummary->samples_in_frame = m_audioSummary->frequency / m_movieParameters.timescale;
                     m_audioMediaParameters.timescale = m_audioSummary->frequency;
-                    nanoem_u32_t audioTrackID = lsmash_create_track(m_root, ISOM_MEDIA_HANDLER_TYPE_AUDIO_TRACK);
-                    nanoem_u32_t audioSummaryIndex = lsmash_add_sample_entry(m_root, audioTrackID, m_audioSummary);
+                    audioTrackID = lsmash_create_track(m_root, ISOM_MEDIA_HANDLER_TYPE_AUDIO_TRACK);
+                    audioSummaryIndex = lsmash_add_sample_entry(m_root, audioTrackID, m_audioSummary);
                     lsmash_set_track_parameters(m_root, audioTrackID, &m_audioTrackParameters);
                     lsmash_set_media_parameters(m_root, audioTrackID, &m_audioMediaParameters);
-                    m_worker = new LSmashEncodeWorker(
-                        m_root, audioTrackID, videoTrackID, audioSummaryIndex, videoSummaryIndex);
-                }
-                else {
-                    m_worker = new LSmashEncodeWorker(m_root, 0, videoTrackID, 0, videoSummaryIndex);
                 }
+                m_videoMediaParameters.timescale = m_movieParameters.timescale;
+                const nanoem_u32_t videoTrackID = lsmash_create_track(m_root, ISOM_MEDIA_HANDLER_TYPE_VIDEO_TRACK),
+                                   videoSummaryIndex = lsmash_add_sample_entry(m_root, videoTrackID, m_videoSummary);
+                lsmash_set_movie_parameters(m_root, &m_movieParameters);
+                lsmash_set_track_parameters(m_root, videoTrackID, &m_videoTrackParameters);
+                lsmash_set_media_parameters(m_root, videoTrackID, &m_videoMediaParameters);
+                m_worker =
+                    new LSmashEncodeWorker(m_root, audioTrackID, videoTrackID, audioSummaryIndex, videoSummaryIndex);
             }
         }
         handleStatusCode(result, status);