From 742259adf024cd2eb7ea40bf733a92f174a1227f Mon Sep 17 00:00:00 2001 From: awkrail Date: Sat, 14 Sep 2024 20:02:54 +0900 Subject: [PATCH 01/16] added test --- .github/workflows/pytest.yml | 6 ++++++ tests/test_models.py | 17 ++++++++++++++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index fa1b1fe..152b3ab 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -19,6 +19,12 @@ jobs: with: python-version: 3.9 + - name: Run ffmpeg + run: | + apt update + apt upgrade + apt install -y ffmpeg + - name: Run dependency libraries run: | pip install torch==2.0.0 torchvision==0.15.1 torchaudio==2.0.1 torchtext==0.15.1 diff --git a/tests/test_models.py b/tests/test_models.py index 190a13c..47eaeee 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -1,3 +1,18 @@ +import subprocess -def test(): +def generate_multiple_duration_videos(): + MIN_DURATION = 10 + MAX_DURATION = 151 + durations = [i for i in range(MIN_DURATION, MAX_DURATION)] + for duration in durations: + cmd = 'ffmpeg -i api_example/RoripwjYFp8_60.0_210.0.mp4 -t {} -c copy tests/test_videos/video_duration_{}.mp4'.format(duration, duration) + subprocess.run(cmd, shell=True) + return True + + +def test_model_prediction(): + features = ['resnet_glove', 'clip', 'clip_slowfast', 'clip_slowfast_pann'] + models = ['moment_detr', 'qd_detr', 'eatr', 'cg_detr', 'uvcom', 'tr_detr', 'taskweave_mr2hd', 'taskweave_hd2mr'] + datasets = ['qvhighlight'] + # TODO: test all of the models on all of the settings. return True \ No newline at end of file From b39f9986a9cd6056184aab9f7013f9af0b5ab2ef Mon Sep 17 00:00:00 2001 From: awkrail Date: Sat, 14 Sep 2024 23:29:35 +0900 Subject: [PATCH 02/16] added sudo --- .github/workflows/pytest.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 152b3ab..33cb6c5 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -21,9 +21,9 @@ jobs: - name: Run ffmpeg run: | - apt update - apt upgrade - apt install -y ffmpeg + sudo apt-get update + sudo apt-get upgrade + sudo apt-get install -y ffmpeg - name: Run dependency libraries run: | From 524570a9df10520fa514f8430fa801f11afff9b1 Mon Sep 17 00:00:00 2001 From: awkrail Date: Sat, 14 Sep 2024 23:41:32 +0900 Subject: [PATCH 03/16] return code --- tests/test_models.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/tests/test_models.py b/tests/test_models.py index 47eaeee..334d00e 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -1,18 +1,26 @@ +import os import subprocess def generate_multiple_duration_videos(): MIN_DURATION = 10 MAX_DURATION = 151 durations = [i for i in range(MIN_DURATION, MAX_DURATION)] + return_codes = [] for duration in durations: cmd = 'ffmpeg -i api_example/RoripwjYFp8_60.0_210.0.mp4 -t {} -c copy tests/test_videos/video_duration_{}.mp4'.format(duration, duration) - subprocess.run(cmd, shell=True) - return True - + result = subprocess.run(cmd, shell=True) + return_codes.append(result.returncode) + + print(os.listdir('tests/test_videos')) + for return_code in return_codes: + assert return_code == 0, 'return_code should be set 0.' def test_model_prediction(): features = ['resnet_glove', 'clip', 'clip_slowfast', 'clip_slowfast_pann'] models = ['moment_detr', 'qd_detr', 'eatr', 'cg_detr', 'uvcom', 'tr_detr', 'taskweave_mr2hd', 'taskweave_hd2mr'] datasets = ['qvhighlight'] # TODO: test all of the models on all of the settings. + + + return True \ No newline at end of file From 94a64d8bedca9747079ed0e78689e6cf7f84b46e Mon Sep 17 00:00:00 2001 From: awkrail Date: Wed, 18 Sep 2024 12:04:12 +0900 Subject: [PATCH 04/16] added test_models.py --- .gitignore | 2 + tests/test_models.py | 76 ++++++++++++++++++++++++++++++++------ tests/test_videos/.gitkeep | 0 tests/weights/.gitkeep | 0 4 files changed, 67 insertions(+), 11 deletions(-) create mode 100644 tests/test_videos/.gitkeep create mode 100644 tests/weights/.gitkeep diff --git a/.gitignore b/.gitignore index fe9a6f7..733ed7d 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,8 @@ gradio_demo/weights/*.ckpt highlight_*.png SLOWFAST_8x8_R50.pkl Cnn14_mAP=0.431.pth +tests/test_videos/video_duration_* +tests/weights/*.ckpt # Mac .DS_Store diff --git a/tests/test_models.py b/tests/test_models.py index 334d00e..db210b5 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -1,26 +1,80 @@ import os +import pytest import subprocess +from lighthouse.models import (MomentDETRPredictor, QDDETRPredictor, EaTRPredictor, + CGDETRPredictor, TRDETRPredictor, UVCOMPredictor) -def generate_multiple_duration_videos(): +FEATURES = ['clip', 'clip_slowfast'] +MODELS = ['moment_detr', 'qd_detr', 'eatr', 'cg_detr', 'uvcom', 'tr_detr'] +DATASETS = ['qvhighlight'] + +@pytest.mark.dependency() +def test_generate_multiple_duration_videos(): MIN_DURATION = 10 MAX_DURATION = 151 durations = [i for i in range(MIN_DURATION, MAX_DURATION)] return_codes = [] for duration in durations: - cmd = 'ffmpeg -i api_example/RoripwjYFp8_60.0_210.0.mp4 -t {} -c copy tests/test_videos/video_duration_{}.mp4'.format(duration, duration) + cmd = f'ffmpeg -y -i api_example/RoripwjYFp8_60.0_210.0.mp4 -t {duration} -c copy tests/test_videos/video_duration_{duration}.mp4' result = subprocess.run(cmd, shell=True) return_codes.append(result.returncode) - - print(os.listdir('tests/test_videos')) for return_code in return_codes: - assert return_code == 0, 'return_code should be set 0.' + assert return_code == 0, '[ffmpeg conversion] return_code should be set 0.' -def test_model_prediction(): - features = ['resnet_glove', 'clip', 'clip_slowfast', 'clip_slowfast_pann'] - models = ['moment_detr', 'qd_detr', 'eatr', 'cg_detr', 'uvcom', 'tr_detr', 'taskweave_mr2hd', 'taskweave_hd2mr'] - datasets = ['qvhighlight'] - # TODO: test all of the models on all of the settings. +@pytest.mark.dependency() +def test_save_model_weights(): + return_codes = [] + for feature in FEATURES: + for model in MODELS: + for dataset in DATASETS: + if not os.path.exists(f'tests/weights/{feature}_{model}_{dataset}.ckpt'): + cmd = f'wget -P tests/weights/ https://zenodo.org/records/13363606/files/{feature}_{model}_{dataset}.ckpt' + result = subprocess.run(cmd, shell=True) + return_codes.append(result.returncode) + for return_code in return_codes: + assert return_code == 0, '[save model weights] return_code should be set 0.' + +@pytest.mark.dependency() +def test_load_slowfast_pann_weights(): + result = subprocess.run('wget -P tests/ https://dl.fbaipublicfiles.com/pyslowfast/' + 'model_zoo/kinetics400/SLOWFAST_8x8_R50.pkl', shell=True) + assert result == 0, '[Save slowfast weights] return_code should be set 0.' + result = subprocess.run('wget -P tests/ https://zenodo.org/record/3987831/files/' + 'Cnn14_mAP%3D0.431.pth', shell=True) + assert result == 0, '[Save PANNs weights] return_code should be set 0.' +@pytest.mark.dependency(depends=['test_generate_multiple_duration_videos', + 'test_save_model_weights', + 'test_load_slowfast_pann_weights']) +def test_model_prediction(): + """ + Test all of the trained models, except for resnet_glove features and taskweave + ResNet+GloVe is skipped due to their low performance. + CLIP+Slowfast+PANNs is skipped due to their low latency. + Taskweave is skiiped because two strategies are neccesary for prediction. + """ + model_loaders = { + 'moment_detr': MomentDETRPredictor, + 'qd_detr': QDDETRPredictor, + 'eatr': EaTRPredictor, + 'cg_detr': CGDETRPredictor, + 'tr_detr': TRDETRPredictor, + 'uvcom': UVCOMPredictor, + } + for feature in FEATURES: + for model in MODELS: + for dataset in DATASETS: + model_weight = os.path.join('tests/weights/', f'{feature}_{model}_{dataset}.ckpt') + model = model_loaders[model](model_weight, device='cpu', feature_name=feature, + slowfast_path='tests/SLOWFAST_8x8_R50.pkl', + pann_path='tests/Cnn14_mAP=0.431.pth') + + # test model on 10s to 150s + for video_path in os.listdir('tests/test_videos/'): + video_path = os.path.join('tests/test_videos/', video_path) + model.encode_video(video_path) - return True \ No newline at end of file + query = 'A woman wearing a glass is speaking in front of the camera' + prediction = model.predict(query) + import ipdb; ipdb.set_trace() \ No newline at end of file diff --git a/tests/test_videos/.gitkeep b/tests/test_videos/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/tests/weights/.gitkeep b/tests/weights/.gitkeep new file mode 100644 index 0000000..e69de29 From 058368570d49e8e63d37dd915e8424d32e526a7e Mon Sep 17 00:00:00 2001 From: awkrail Date: Wed, 18 Sep 2024 12:12:13 +0900 Subject: [PATCH 05/16] added lighthouse --- .github/workflows/pytest.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 33cb6c5..20cddce 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -30,7 +30,9 @@ jobs: pip install torch==2.0.0 torchvision==0.15.1 torchaudio==2.0.1 torchtext==0.15.1 pip install easydict pandas tqdm pyyaml scikit-learn ffmpeg-python ftfy regex einops fvcore gradio torchlibrosa librosa pip install 'clip@git+https://github.com/openai/CLIP.git' + pip install 'git+https://github.com/line/lighthouse.git' pip install pytest + - name: Run pytest run: pytest tests/test_models.py \ No newline at end of file From 636801fb270f4febe0ab40660265073f251e5a05 Mon Sep 17 00:00:00 2001 From: awkrail Date: Wed, 18 Sep 2024 13:25:55 +0900 Subject: [PATCH 06/16] fix returncode --- .github/workflows/pytest.yml | 2 +- tests/test_models.py | 13 +++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 20cddce..cdf0e3f 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -35,4 +35,4 @@ jobs: - name: Run pytest - run: pytest tests/test_models.py \ No newline at end of file + run: pytest -s tests/test_models.py \ No newline at end of file diff --git a/tests/test_models.py b/tests/test_models.py index db210b5..e33b048 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -36,12 +36,13 @@ def test_save_model_weights(): @pytest.mark.dependency() def test_load_slowfast_pann_weights(): - result = subprocess.run('wget -P tests/ https://dl.fbaipublicfiles.com/pyslowfast/' - 'model_zoo/kinetics400/SLOWFAST_8x8_R50.pkl', shell=True) - assert result == 0, '[Save slowfast weights] return_code should be set 0.' - result = subprocess.run('wget -P tests/ https://zenodo.org/record/3987831/files/' - 'Cnn14_mAP%3D0.431.pth', shell=True) - assert result == 0, '[Save PANNs weights] return_code should be set 0.' + if not os.path.exists('tests/SLOWFAST_8x8_R50.pkl'): + result = subprocess.run('wget -P tests/ https://dl.fbaipublicfiles.com/pyslowfast/model_zoo/kinetics400/SLOWFAST_8x8_R50.pkl', shell=True) + assert result.returncode == 0, '[Save slowfast weights] return_code should be set 0.' + + if not os.path.exists('tests/Cnn14_mAP=0.431.pth'): + result = subprocess.run('wget -P tests/ https://zenodo.org/record/3987831/files/Cnn14_mAP%3D0.431.pth', shell=True) + assert result.returncode == 0, '[Save PANNs weights] return_code should be set 0.' @pytest.mark.dependency(depends=['test_generate_multiple_duration_videos', 'test_save_model_weights', From cb7add3beea75b56519e70ebb4b027145b8e2ba7 Mon Sep 17 00:00:00 2001 From: awkrail Date: Wed, 18 Sep 2024 13:32:43 +0900 Subject: [PATCH 07/16] rm -s --- .github/workflows/pytest.yml | 2 +- tests/test_models.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index cdf0e3f..20cddce 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -35,4 +35,4 @@ jobs: - name: Run pytest - run: pytest -s tests/test_models.py \ No newline at end of file + run: pytest tests/test_models.py \ No newline at end of file diff --git a/tests/test_models.py b/tests/test_models.py index e33b048..01332aa 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -77,5 +77,4 @@ def test_model_prediction(): model.encode_video(video_path) query = 'A woman wearing a glass is speaking in front of the camera' - prediction = model.predict(query) - import ipdb; ipdb.set_trace() \ No newline at end of file + prediction = model.predict(query) \ No newline at end of file From 2815b17bfffb4e018444c4cf3b1f540ea3146660 Mon Sep 17 00:00:00 2001 From: awkrail Date: Wed, 18 Sep 2024 13:42:26 +0900 Subject: [PATCH 08/16] update numpy --- .github/workflows/pytest.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 20cddce..24126cd 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -32,6 +32,7 @@ jobs: pip install 'clip@git+https://github.com/openai/CLIP.git' pip install 'git+https://github.com/line/lighthouse.git' pip install pytest + pip install numpy --upgrade - name: Run pytest From daf5de3cae514e90bff20c745bdf0c181957c631 Mon Sep 17 00:00:00 2001 From: awkrail Date: Wed, 18 Sep 2024 14:45:31 +0900 Subject: [PATCH 09/16] fixing test_model --- tests/test_models.py | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/tests/test_models.py b/tests/test_models.py index 01332aa..2b07d02 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -1,4 +1,5 @@ import os +import math import pytest import subprocess from lighthouse.models import (MomentDETRPredictor, QDDETRPredictor, EaTRPredictor, @@ -7,11 +8,13 @@ FEATURES = ['clip', 'clip_slowfast'] MODELS = ['moment_detr', 'qd_detr', 'eatr', 'cg_detr', 'uvcom', 'tr_detr'] DATASETS = ['qvhighlight'] +MIN_DURATION = 10 +MAX_DURATION = 151 +MOMENT_NUM = 10 + @pytest.mark.dependency() def test_generate_multiple_duration_videos(): - MIN_DURATION = 10 - MAX_DURATION = 151 durations = [i for i in range(MIN_DURATION, MAX_DURATION)] return_codes = [] for duration in durations: @@ -64,17 +67,24 @@ def test_model_prediction(): } for feature in FEATURES: - for model in MODELS: + for model_name in MODELS: for dataset in DATASETS: - model_weight = os.path.join('tests/weights/', f'{feature}_{model}_{dataset}.ckpt') - model = model_loaders[model](model_weight, device='cpu', feature_name=feature, - slowfast_path='tests/SLOWFAST_8x8_R50.pkl', - pann_path='tests/Cnn14_mAP=0.431.pth') + model_weight = os.path.join('tests/weights/', f'{feature}_{model_name}_{dataset}.ckpt') + model = model_loaders[model_name](model_weight, device='cpu', feature_name=feature, + slowfast_path='tests/SLOWFAST_8x8_R50.pkl', + pann_path='tests/Cnn14_mAP=0.431.pth') # test model on 10s to 150s - for video_path in os.listdir('tests/test_videos/'): - video_path = os.path.join('tests/test_videos/', video_path) + for second in range(MIN_DURATION, MAX_DURATION): + video_path = f'tests/test_videos/video_duration_{second}.mp4' model.encode_video(video_path) query = 'A woman wearing a glass is speaking in front of the camera' - prediction = model.predict(query) \ No newline at end of file + prediction = model.predict(query) + try: + assert len(prediction['pred_relevant_windows']) == MOMENT_NUM, \ + f'The number of moments from {feature}_{model_name}_{dataset} is expected {MOMENT_NUM}, but got {len(prediction["pred_relevant_windows"])}.' + assert len(prediction['pred_saliency_scores']) == math.ceil(second / model._clip_len), \ + f'The number of saliency scores from {feature}_{model_name}_{dataset} is expected {math.ceil(second / model._clip_len)}, but got {len(prediction["pred_saliency_scores"])}.' + except: + import ipdb; ipdb.set_trace() \ No newline at end of file From 7bda000e02e98dd18bc27faebce9f70f04b38b38 Mon Sep 17 00:00:00 2001 From: awkrail Date: Wed, 18 Sep 2024 15:25:31 +0900 Subject: [PATCH 10/16] implemented assertion for all model settings --- tests/test_models.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/tests/test_models.py b/tests/test_models.py index 2b07d02..eef6fc3 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -81,10 +81,7 @@ def test_model_prediction(): query = 'A woman wearing a glass is speaking in front of the camera' prediction = model.predict(query) - try: - assert len(prediction['pred_relevant_windows']) == MOMENT_NUM, \ - f'The number of moments from {feature}_{model_name}_{dataset} is expected {MOMENT_NUM}, but got {len(prediction["pred_relevant_windows"])}.' - assert len(prediction['pred_saliency_scores']) == math.ceil(second / model._clip_len), \ - f'The number of saliency scores from {feature}_{model_name}_{dataset} is expected {math.ceil(second / model._clip_len)}, but got {len(prediction["pred_saliency_scores"])}.' - except: - import ipdb; ipdb.set_trace() \ No newline at end of file + assert len(prediction['pred_relevant_windows']) == MOMENT_NUM, \ + f'The number of moments from {feature}_{model_name}_{dataset} is expected {MOMENT_NUM}, but got {len(prediction["pred_relevant_windows"])}.' + assert len(prediction['pred_saliency_scores']) == math.ceil(second / model._clip_len), \ + f'The number of saliency scores from {feature}_{model_name}_{dataset} is expected {math.ceil(second / model._clip_len)}, but got {len(prediction["pred_saliency_scores"])}.' \ No newline at end of file From b31a6f5a915ca35d5c5b705a991be3578835183c Mon Sep 17 00:00:00 2001 From: awkrail Date: Wed, 18 Sep 2024 15:34:44 +0900 Subject: [PATCH 11/16] fix mypy --- lighthouse/models.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lighthouse/models.py b/lighthouse/models.py index 7a1def6..56cf637 100644 --- a/lighthouse/models.py +++ b/lighthouse/models.py @@ -198,10 +198,10 @@ def _post_processing( pred_spans = torch.clamp(span_cxw_to_xx(pred_spans) * video_duration, min=0, max=video_duration) cur_ranked_preds = torch.cat([pred_spans, scores[:, None]], dim=1).tolist() cur_ranked_preds = sorted(cur_ranked_preds, key=lambda x: x[2], reverse=True) - cur_ranked_preds = [[float(f"{e:.4f}") for e in row] for row in cur_ranked_preds][:self._moment_num] + cur_ranked_preds = [[float(f"{e:.4f}") for e in row] for row in cur_ranked_preds] saliency_scores = outputs["saliency_scores"][inputs["src_vid_mask"] == 1].cpu().tolist() - - return cur_ranked_preds, saliency_scores + + return cur_ranked_preds[:self._moment_num], saliency_scores def _encode_audio( self, From a8959ecae1c00639f9a4592e1cfd24abedac487f Mon Sep 17 00:00:00 2001 From: awkrail Date: Wed, 18 Sep 2024 15:44:30 +0900 Subject: [PATCH 12/16] fix numpy issue / specify numpy version --- .github/workflows/pytest.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 24126cd..fc6e25c 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -32,7 +32,8 @@ jobs: pip install 'clip@git+https://github.com/openai/CLIP.git' pip install 'git+https://github.com/line/lighthouse.git' pip install pytest - pip install numpy --upgrade + pip uninstall numpy + pip install numpy==1.23.5 - name: Run pytest From 8c699fd00c23b015d7e81a363e2f58eb354ff650 Mon Sep 17 00:00:00 2001 From: awkrail Date: Wed, 18 Sep 2024 15:49:47 +0900 Subject: [PATCH 13/16] added yes --- .github/workflows/pytest.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index fc6e25c..38a2970 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -32,7 +32,7 @@ jobs: pip install 'clip@git+https://github.com/openai/CLIP.git' pip install 'git+https://github.com/line/lighthouse.git' pip install pytest - pip uninstall numpy + pip uninstall -y numpy pip install numpy==1.23.5 From aa87601c8b647a385debbc5f4e4366c10aaf785b Mon Sep 17 00:00:00 2001 From: awkrail Date: Wed, 18 Sep 2024 16:33:24 +0900 Subject: [PATCH 14/16] rm tr_detr coz it use .cuda() function --- tests/test_models.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tests/test_models.py b/tests/test_models.py index eef6fc3..f592ec2 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -6,7 +6,7 @@ CGDETRPredictor, TRDETRPredictor, UVCOMPredictor) FEATURES = ['clip', 'clip_slowfast'] -MODELS = ['moment_detr', 'qd_detr', 'eatr', 'cg_detr', 'uvcom', 'tr_detr'] +MODELS = ['moment_detr', 'qd_detr', 'eatr', 'cg_detr', 'uvcom'] DATASETS = ['qvhighlight'] MIN_DURATION = 10 MAX_DURATION = 151 @@ -53,16 +53,19 @@ def test_load_slowfast_pann_weights(): def test_model_prediction(): """ Test all of the trained models, except for resnet_glove features and taskweave - ResNet+GloVe is skipped due to their low performance. - CLIP+Slowfast+PANNs is skipped due to their low latency. - Taskweave is skiiped because two strategies are neccesary for prediction. + Untested features: + - ResNet+GloVe is skipped due to their low performance. + - CLIP+Slowfast+PANNs is skipped due to their low latency. + + Untested models: + - TR-DETR is skipped because model use .cuda() function. We need to remove it. + - Taskweave is skiped because two strategies are neccesary for prediction. """ model_loaders = { 'moment_detr': MomentDETRPredictor, 'qd_detr': QDDETRPredictor, 'eatr': EaTRPredictor, 'cg_detr': CGDETRPredictor, - 'tr_detr': TRDETRPredictor, 'uvcom': UVCOMPredictor, } From c6366ac170e61a43add6cf00f54846c81e4d68e3 Mon Sep 17 00:00:00 2001 From: awkrail Date: Wed, 18 Sep 2024 18:56:15 +0900 Subject: [PATCH 15/16] fixed a bug for clip+slowfast --- lighthouse/feature_extractor/vision_encoder.py | 7 ++++++- lighthouse/frame_loaders/slowfast_loader.py | 4 ++-- tests/test_models.py | 4 ++-- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/lighthouse/feature_extractor/vision_encoder.py b/lighthouse/feature_extractor/vision_encoder.py index 2215b58..1dde4fc 100644 --- a/lighthouse/feature_extractor/vision_encoder.py +++ b/lighthouse/feature_extractor/vision_encoder.py @@ -87,6 +87,11 @@ def _select_visual_encoders(self) -> List[Any]: model_path_dict[self._feature_name])] return visual_encoders + def _trim_shorter_length(self, visual_features): + min_length = min([x.shape[0] for x in visual_features]) + trimmed_visual_features = [x[:min_length] for x in visual_features] + return trimmed_visual_features + def encode( self, input_path: str) -> Tuple[torch.Tensor, torch.Tensor]: @@ -94,6 +99,6 @@ def encode( frame_inputs = [loader(input_path) for loader in self._frame_loaders] assert not any([item is None for item in frame_inputs]), 'one of the loaders return None object.' visual_features = [encoder(frames) for encoder, frames in zip(self._visual_encoders, frame_inputs)] - concat_features = torch.concat(visual_features, dim=-1) + concat_features = torch.concat(self._trim_shorter_length(visual_features), dim=-1) visual_mask = torch.ones(1, len(concat_features)).to(self._device) return concat_features, visual_mask \ No newline at end of file diff --git a/lighthouse/frame_loaders/slowfast_loader.py b/lighthouse/frame_loaders/slowfast_loader.py index c01da6b..8ae55b3 100644 --- a/lighthouse/frame_loaders/slowfast_loader.py +++ b/lighthouse/frame_loaders/slowfast_loader.py @@ -102,11 +102,11 @@ def _pad_frames(self, tensor, value=0): if n == self._target_fps: return tensor if self._padding_mode == "constant": - z = torch.ones(n, tensor.shape[1], tensor.shape[2], tensor.shape[3], dtype=torch.uint8) + z = torch.ones(int(n), tensor.shape[1], tensor.shape[2], tensor.shape[3], dtype=torch.uint8) z *= value return torch.cat((tensor, z), 0) elif self._padding_mode == "tile": - z = torch.cat(n * [tensor[-1:, :, :, :]]) + z = torch.cat(int(n) * [tensor[-1:, :, :, :]]) return torch.cat((tensor, z), 0) else: raise NotImplementedError( diff --git a/tests/test_models.py b/tests/test_models.py index f592ec2..796a05f 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -3,7 +3,8 @@ import pytest import subprocess from lighthouse.models import (MomentDETRPredictor, QDDETRPredictor, EaTRPredictor, - CGDETRPredictor, TRDETRPredictor, UVCOMPredictor) + CGDETRPredictor, UVCOMPredictor) + FEATURES = ['clip', 'clip_slowfast'] MODELS = ['moment_detr', 'qd_detr', 'eatr', 'cg_detr', 'uvcom'] @@ -81,7 +82,6 @@ def test_model_prediction(): for second in range(MIN_DURATION, MAX_DURATION): video_path = f'tests/test_videos/video_duration_{second}.mp4' model.encode_video(video_path) - query = 'A woman wearing a glass is speaking in front of the camera' prediction = model.predict(query) assert len(prediction['pred_relevant_windows']) == MOMENT_NUM, \ From b073107089488d1072976700f8c9dead0d5853b6 Mon Sep 17 00:00:00 2001 From: awkrail Date: Wed, 18 Sep 2024 18:56:40 +0900 Subject: [PATCH 16/16] checked pytest --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 377efba..f7f931d 100755 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ It supports seven models, four features (video and audio features), and six data We will release v1.0 until the end of September. Our plan includes: - [x] : Reduce the configuration files (issue #19) - [ ] : Update the trained weights and feature files on Google Drive and Zenodo -- [ ] : Introduce PyTest for inference API (issue #21) +- [x] : Introduce PyTest for inference API (issue #21) - [x] : Introduce Linter for inference API (issue #20) ## Installation