Skip to content

Commit

Permalink
Merge pull request #35 from line/model_prediction_test
Browse files Browse the repository at this point in the history
Add test for inference APIs
  • Loading branch information
awkrail authored Sep 18, 2024
2 parents caa7125 + b073107 commit 9062b7a
Show file tree
Hide file tree
Showing 9 changed files with 113 additions and 9 deletions.
10 changes: 10 additions & 0 deletions .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,22 @@ jobs:
with:
python-version: 3.9

- name: Run ffmpeg
run: |
sudo apt-get update
sudo apt-get upgrade
sudo apt-get install -y ffmpeg
- name: Run dependency libraries
run: |
pip install torch==2.0.0 torchvision==0.15.1 torchaudio==2.0.1 torchtext==0.15.1
pip install easydict pandas tqdm pyyaml scikit-learn ffmpeg-python ftfy regex einops fvcore gradio torchlibrosa librosa
pip install 'clip@git+https://github.com/openai/CLIP.git'
pip install 'git+https://github.com/line/lighthouse.git'
pip install pytest
pip uninstall -y numpy
pip install numpy==1.23.5
- name: Run pytest
run: pytest tests/test_models.py
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ gradio_demo/weights/*.ckpt
highlight_*.png
SLOWFAST_8x8_R50.pkl
Cnn14_mAP=0.431.pth
tests/test_videos/video_duration_*
tests/weights/*.ckpt

# Mac
.DS_Store
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ It supports seven models, four features (video and audio features), and six data
We will release v1.0 until the end of September. Our plan includes:
- [x] : Reduce the configuration files (issue #19)
- [ ] : Update the trained weights and feature files on Google Drive and Zenodo
- [ ] : Introduce PyTest for inference API (issue #21)
- [x] : Introduce PyTest for inference API (issue #21)
- [x] : Introduce Linter for inference API (issue #20)

## Installation
Expand Down
7 changes: 6 additions & 1 deletion lighthouse/feature_extractor/vision_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,13 +87,18 @@ def _select_visual_encoders(self) -> List[Any]:
model_path_dict[self._feature_name])]
return visual_encoders

def _trim_shorter_length(self, visual_features):
min_length = min([x.shape[0] for x in visual_features])
trimmed_visual_features = [x[:min_length] for x in visual_features]
return trimmed_visual_features

def encode(
self,
input_path: str) -> Tuple[torch.Tensor, torch.Tensor]:
assert len(self._frame_loaders) == len(self._visual_encoders), 'the number of frame_loaders and visual_encoders is different.'
frame_inputs = [loader(input_path) for loader in self._frame_loaders]
assert not any([item is None for item in frame_inputs]), 'one of the loaders return None object.'
visual_features = [encoder(frames) for encoder, frames in zip(self._visual_encoders, frame_inputs)]
concat_features = torch.concat(visual_features, dim=-1)
concat_features = torch.concat(self._trim_shorter_length(visual_features), dim=-1)
visual_mask = torch.ones(1, len(concat_features)).to(self._device)
return concat_features, visual_mask
4 changes: 2 additions & 2 deletions lighthouse/frame_loaders/slowfast_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,11 +102,11 @@ def _pad_frames(self, tensor, value=0):
if n == self._target_fps:
return tensor
if self._padding_mode == "constant":
z = torch.ones(n, tensor.shape[1], tensor.shape[2], tensor.shape[3], dtype=torch.uint8)
z = torch.ones(int(n), tensor.shape[1], tensor.shape[2], tensor.shape[3], dtype=torch.uint8)
z *= value
return torch.cat((tensor, z), 0)
elif self._padding_mode == "tile":
z = torch.cat(n * [tensor[-1:, :, :, :]])
z = torch.cat(int(n) * [tensor[-1:, :, :, :]])
return torch.cat((tensor, z), 0)
else:
raise NotImplementedError(
Expand Down
6 changes: 3 additions & 3 deletions lighthouse/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,10 +198,10 @@ def _post_processing(
pred_spans = torch.clamp(span_cxw_to_xx(pred_spans) * video_duration, min=0, max=video_duration)
cur_ranked_preds = torch.cat([pred_spans, scores[:, None]], dim=1).tolist()
cur_ranked_preds = sorted(cur_ranked_preds, key=lambda x: x[2], reverse=True)
cur_ranked_preds = [[float(f"{e:.4f}") for e in row] for row in cur_ranked_preds][:self._moment_num]
cur_ranked_preds = [[float(f"{e:.4f}") for e in row] for row in cur_ranked_preds]
saliency_scores = outputs["saliency_scores"][inputs["src_vid_mask"] == 1].cpu().tolist()

return cur_ranked_preds, saliency_scores
return cur_ranked_preds[:self._moment_num], saliency_scores

def _encode_audio(
self,
Expand Down
91 changes: 89 additions & 2 deletions tests/test_models.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,90 @@
import os
import math
import pytest
import subprocess
from lighthouse.models import (MomentDETRPredictor, QDDETRPredictor, EaTRPredictor,
CGDETRPredictor, UVCOMPredictor)

def test():
return True

FEATURES = ['clip', 'clip_slowfast']
MODELS = ['moment_detr', 'qd_detr', 'eatr', 'cg_detr', 'uvcom']
DATASETS = ['qvhighlight']
MIN_DURATION = 10
MAX_DURATION = 151
MOMENT_NUM = 10


@pytest.mark.dependency()
def test_generate_multiple_duration_videos():
durations = [i for i in range(MIN_DURATION, MAX_DURATION)]
return_codes = []
for duration in durations:
cmd = f'ffmpeg -y -i api_example/RoripwjYFp8_60.0_210.0.mp4 -t {duration} -c copy tests/test_videos/video_duration_{duration}.mp4'
result = subprocess.run(cmd, shell=True)
return_codes.append(result.returncode)
for return_code in return_codes:
assert return_code == 0, '[ffmpeg conversion] return_code should be set 0.'

@pytest.mark.dependency()
def test_save_model_weights():
return_codes = []
for feature in FEATURES:
for model in MODELS:
for dataset in DATASETS:
if not os.path.exists(f'tests/weights/{feature}_{model}_{dataset}.ckpt'):
cmd = f'wget -P tests/weights/ https://zenodo.org/records/13363606/files/{feature}_{model}_{dataset}.ckpt'
result = subprocess.run(cmd, shell=True)
return_codes.append(result.returncode)
for return_code in return_codes:
assert return_code == 0, '[save model weights] return_code should be set 0.'

@pytest.mark.dependency()
def test_load_slowfast_pann_weights():
if not os.path.exists('tests/SLOWFAST_8x8_R50.pkl'):
result = subprocess.run('wget -P tests/ https://dl.fbaipublicfiles.com/pyslowfast/model_zoo/kinetics400/SLOWFAST_8x8_R50.pkl', shell=True)
assert result.returncode == 0, '[Save slowfast weights] return_code should be set 0.'

if not os.path.exists('tests/Cnn14_mAP=0.431.pth'):
result = subprocess.run('wget -P tests/ https://zenodo.org/record/3987831/files/Cnn14_mAP%3D0.431.pth', shell=True)
assert result.returncode == 0, '[Save PANNs weights] return_code should be set 0.'

@pytest.mark.dependency(depends=['test_generate_multiple_duration_videos',
'test_save_model_weights',
'test_load_slowfast_pann_weights'])
def test_model_prediction():
"""
Test all of the trained models, except for resnet_glove features and taskweave
Untested features:
- ResNet+GloVe is skipped due to their low performance.
- CLIP+Slowfast+PANNs is skipped due to their low latency.
Untested models:
- TR-DETR is skipped because model use .cuda() function. We need to remove it.
- Taskweave is skiped because two strategies are neccesary for prediction.
"""
model_loaders = {
'moment_detr': MomentDETRPredictor,
'qd_detr': QDDETRPredictor,
'eatr': EaTRPredictor,
'cg_detr': CGDETRPredictor,
'uvcom': UVCOMPredictor,
}

for feature in FEATURES:
for model_name in MODELS:
for dataset in DATASETS:
model_weight = os.path.join('tests/weights/', f'{feature}_{model_name}_{dataset}.ckpt')
model = model_loaders[model_name](model_weight, device='cpu', feature_name=feature,
slowfast_path='tests/SLOWFAST_8x8_R50.pkl',
pann_path='tests/Cnn14_mAP=0.431.pth')

# test model on 10s to 150s
for second in range(MIN_DURATION, MAX_DURATION):
video_path = f'tests/test_videos/video_duration_{second}.mp4'
model.encode_video(video_path)
query = 'A woman wearing a glass is speaking in front of the camera'
prediction = model.predict(query)
assert len(prediction['pred_relevant_windows']) == MOMENT_NUM, \
f'The number of moments from {feature}_{model_name}_{dataset} is expected {MOMENT_NUM}, but got {len(prediction["pred_relevant_windows"])}.'
assert len(prediction['pred_saliency_scores']) == math.ceil(second / model._clip_len), \
f'The number of saliency scores from {feature}_{model_name}_{dataset} is expected {math.ceil(second / model._clip_len)}, but got {len(prediction["pred_saliency_scores"])}.'
Empty file added tests/test_videos/.gitkeep
Empty file.
Empty file added tests/weights/.gitkeep
Empty file.

0 comments on commit 9062b7a

Please sign in to comment.