forked from ocean-data-factory-sweden/kso-utils
-
Notifications
You must be signed in to change notification settings - Fork 0
/
t3_utils.py
1177 lines (976 loc) · 38.6 KB
/
t3_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# -*- coding: utf-8 -*-
# base imports
import os
import shutil
import ffmpeg as ffmpeg_python
import pandas as pd
import numpy as np
import math
import datetime
import subprocess
import logging
import random
import threading
from multiprocessing.pool import ThreadPool as Pool
# widget imports
from tqdm import tqdm
from IPython.display import display, clear_output
from ipywidgets import interactive, Layout
import ipywidgets as widgets
from panoptes_client import (
SubjectSet,
Subject,
)
# util imports
import kso_utils.db_utils as db_utils
import kso_utils.project_utils as project_utils
# Logging
logging.basicConfig()
logging.getLogger().setLevel(logging.INFO)
############################################################
######## Create some clip examples #########################
############################################################
def check_movie_uploaded(movie_i: str, db_info_dict: dict):
"""
This function takes in a movie name and a dictionary containing the path to the database and returns
a boolean value indicating whether the movie has already been uploaded to Zooniverse
:param movie_i: the name of the movie you want to check
:type movie_i: str
:param db_info_dict: a dictionary containing the path to the database and the path to the folder
containing the videos
:type db_info_dict: dict
"""
# Create connection to db
conn = db_utils.create_connection(db_info_dict["db_path"])
# Query info about the clip subjects uploaded to Zooniverse
subjects_df = pd.read_sql_query(
"SELECT id, subject_type, filename, clip_start_time,"
"clip_end_time, movie_id FROM subjects WHERE subject_type='clip'",
conn,
)
# Save the video filenames of the clips uploaded to Zooniverse
videos_uploaded = subjects_df.filename.dropna().unique()
# Check if selected movie has already been uploaded
already_uploaded = any(mv in movie_i for mv in videos_uploaded)
if already_uploaded:
clips_uploaded = subjects_df[subjects_df["filename"].str.contains(movie_i)]
logging.info(f"{movie_i} has clips already uploaded.")
else:
logging.info(f"{movie_i} has not been uploaded to Zooniverse yet")
def select_clip_length():
"""
> This function creates a dropdown widget that allows the user to select the length of the clips
:return: The widget is being returned.
"""
# Widget to record the length of the clips
ClipLength_widget = widgets.Dropdown(
options=[10, 5],
value=10,
description="Length of clips:",
style={"description_width": "initial"},
ensure_option=True,
disabled=False,
)
return ClipLength_widget
def select_random_clips(movie_i: str, db_info_dict: dict):
"""
> The function `select_random_clips` takes in a movie name and a dictionary containing information
about the database, and returns a dictionary containing the starting points of the clips and the
length of the clips.
:param movie_i: the name of the movie of interest
:type movie_i: str
:param db_info_dict: a dictionary containing the path to the database and the name of the database
:type db_info_dict: dict
:return: A dictionary with the starting points of the clips and the length of the clips.
"""
# Create connection to db
conn = db_utils.create_connection(db_info_dict["db_path"])
# Query info about the movie of interest
movie_df = pd.read_sql_query(
f"SELECT filename, duration, sampling_start, sampling_end FROM movies WHERE filename='{movie_i}'",
conn,
)
# Select n number of clips at random
def n_random_clips(clip_length, n_clips):
# Create a list of starting points for n number of clips
duration_movie = math.floor(movie_df["duration"].values[0])
starting_clips = random.sample(range(0, duration_movie, clip_length), n_clips)
# Seave the outputs in a dictionary
random_clips_info = {
# The starting points of the clips
"clip_start_time": starting_clips,
# The length of the clips
"random_clip_length": clip_length,
}
logging.info(
f"The initial seconds of the examples will be: {random_clips_info['clip_start_time']}"
)
return random_clips_info
# Select the number of clips to upload
clip_length_number = interactive(
n_random_clips,
clip_length=select_clip_length(),
n_clips=widgets.IntSlider(
value=3,
min=1,
max=5,
step=1,
description="Number of random clips:",
disabled=False,
layout=Layout(width="40%"),
style={"description_width": "initial"},
),
)
display(clip_length_number)
return clip_length_number
# Function to extract the videos
def extract_example_clips(
output_clip_path: str, start_time_i: int, clip_length: int, movie_path: str
):
"""
> Extracts a clip from a movie file, and saves it to a new file
:param output_clip_path: The path to the output clip
:param start_time_i: The start time of the clip in seconds
:param clip_length: The length of the clip in seconds
:param movie_path: the path to the movie file
"""
# Extract the clip
if not os.path.exists(output_clip_path):
subprocess.call(
[
"ffmpeg",
"-ss",
str(start_time_i),
"-t",
str(clip_length),
"-i",
str(movie_path),
"-c",
"copy",
"-an", # removes the audio
"-force_key_frames",
"1",
str(output_clip_path),
]
)
os.chmod(output_clip_path, 0o777)
def create_example_clips(
movie_i: str,
movie_path: str,
db_info_dict: dict,
project: project_utils.Project,
clip_selection,
pool_size=4,
):
"""
This function takes a movie and extracts clips from it, based on the start time and length of the
clips
:param movie_i: str, the name of the movie
:type movie_i: str
:param movie_path: the path to the movie
:type movie_path: str
:param db_info_dict: a dictionary containing the information of the database
:type db_info_dict: dict
:param project: the project object
:param clip_selection: a dictionary with the following keys:
:param pool_size: The number of parallel processes to run, defaults to 4 (optional)
:return: The path of the clips
"""
# Specify the starting seconds and length of the example clips
clips_start_time = clip_selection.result["clip_start_time"]
clip_length = clip_selection.result["random_clip_length"]
# Get project-specific server info
server = project.server
# Specify the temp folder to host the clips
output_clip_folder = movie_i + "_clips"
if server == "SNIC":
clips_folder = os.path.join(
"/cephyr/NOBACKUP/groups/snic2021-6-9/tmp_dir/", output_clip_folder
)
else:
clips_folder = output_clip_folder
# Create the folder to store the videos if not exist
if not os.path.exists(clips_folder):
os.mkdir(clips_folder)
os.chmod(clips_folder, 0o777)
# Specify the number of parallel items
pool = Pool(pool_size)
# Create empty list to keep track of new clips
example_clips = []
# Create the information for each clip and extract it (printing a progress bar)
for start_time_i in clips_start_time:
# Create the filename and path of the clip
output_clip_name = (
movie_i + "_clip_" + str(start_time_i) + "_" + str(clip_length) + ".mp4"
)
output_clip_path = os.path.join(clips_folder, output_clip_name)
# Add the path of the clip to the list
example_clips = example_clips + [output_clip_path]
# Extract the clips and store them in the folder
pool.apply_async(
extract_example_clips,
(
output_clip_path,
start_time_i,
clip_length,
movie_path,
),
)
pool.close()
pool.join()
logging.info("Clips extracted successfully")
return example_clips
def check_clip_size(clips_list: list):
"""
> This function takes a list of file paths and returns a dataframe with the file path and size of
each file. If the size is too large, we suggest compressing them as a first step.
:param clips_list: list of file paths to the clips you want to check
:type clips_list: list
:return: A dataframe with the file path and size of each clip
"""
# Get list of files with size
if clips_list is None:
logging.error("No clips found.")
return None
files_with_size = [
(file_path, os.path.getsize(file_path) / float(1 << 20))
for file_path in clips_list
]
df = pd.DataFrame(files_with_size, columns=["File_path", "Size"])
if df["Size"].ge(8).any():
logging.info(
"Clips are too large (over 8 MB) to be uploaded to Zooniverse. Compress them!"
)
return df
else:
logging.info(
"Clips are a good size (below 8 MB). Ready to be uploaded to Zooniverse"
)
return df
class clip_modification_widget(widgets.VBox):
def __init__(self):
"""
The function creates a widget that allows the user to select which modifications to run
"""
self.widget_count = widgets.IntText(
description="Number of modifications:",
display="flex",
flex_flow="column",
align_items="stretch",
style={"description_width": "initial"},
)
self.bool_widget_holder = widgets.HBox(
layout=widgets.Layout(
width="100%", display="inline-flex", flex_flow="row wrap"
)
)
children = [
self.widget_count,
self.bool_widget_holder,
]
self.widget_count.observe(self._add_bool_widgets, names=["value"])
super().__init__(children=children)
def _add_bool_widgets(self, widg):
num_bools = widg["new"]
new_widgets = []
for _ in range(num_bools):
new_widget = select_modification()
for wdgt in [new_widget]:
wdgt.description = wdgt.description + f" #{_}"
new_widgets.extend([new_widget])
self.bool_widget_holder.children = tuple(new_widgets)
@property
def checks(self):
return {w.description: w.value for w in self.bool_widget_holder.children}
def select_modification():
"""
This function creates a dropdown widget that allows the user to select a clip modification
:return: A widget that allows the user to select a clip modification.
"""
# Widget to select the clip modification
clip_modifications = {
"Color_correction": {
"filter": ".filter('curves', '0/0 0.396/0.67 1/1', \
'0/0 0.525/0.451 1/1', \
'0/0 0.459/0.517 1/1')"
}
# borrowed from https://www.element84.com/blog/color-correction-in-space-and-at-sea
,
"Zoo_low_compression": {
"crf": "25",
"bv": "7",
},
"Zoo_medium_compression": {
"crf": "27",
"bv": "6",
},
"Zoo_high_compression": {
"crf": "30",
"bv": "5",
},
"Blur_sensitive_info": {
"filter": ".drawbox(0, 0, 'iw', 'ih*(15/100)', color='black' \
,thickness='fill').drawbox(0, 'ih*(95/100)', \
'iw', 'ih*(15/100)', color='black', thickness='fill')",
"None": {},
},
}
select_modification_widget = widgets.Dropdown(
options=[(a, b) for a, b in clip_modifications.items()],
description="Select modification:",
ensure_option=True,
disabled=False,
style={"description_width": "initial"},
)
# display(select_modification_widget)
return select_modification_widget
def modify_clips(
clip_i: str, modification_details: dict, output_clip_path: str, gpu_available: bool
):
"""
> This function takes in a clip, a dictionary of modification details, and an output path, and then
modifies the clip using the details provided
:param clip_i: the path to the clip to be modified
:param modification_details: a dictionary of the modifications to be made to the clip
:param output_clip_path: The path to the output clip
:param gpu_available: If you have a GPU, set this to True. If you don't, set it to False
"""
if gpu_available:
# Unnest the modification detail dict
df = pd.json_normalize(modification_details, sep="_")
b_v = df.filter(regex="bv$", axis=1).values[0][0] + "M"
subprocess.call(
[
"ffmpeg",
"-hwaccel",
"cuda",
"-hwaccel_output_format",
"cuda",
"-i",
clip_i,
"-c:a",
"copy",
"-c:v",
"h264_nvenc",
"-b:v",
b_v,
output_clip_path,
]
)
else:
# Set up input prompt
init_prompt = f"ffmpeg_python.input('{clip_i}')"
default_output_prompt = f".output('{output_clip_path}', crf=20, pix_fmt='yuv420p', vcodec='libx264')"
full_prompt = init_prompt
mod_prompt = ''
# Set up modification
for transform in modification_details.values():
if "filter" in transform:
mod_prompt += transform["filter"]
else:
# Unnest the modification detail dict
df = pd.json_normalize(modification_details, sep="_")
crf = df.filter(regex="crf$", axis=1).values[0][0]
out_prompt = f".output('{output_clip_path}', crf={crf}, preset='veryfast', pix_fmt='yuv420p', vcodec='libx264')"
if len(mod_prompt) > 0:
full_prompt += mod_prompt
if out_prompt:
full_prompt += out_prompt
else:
full_prompt += default_output_prompt
# Run the modification
try:
eval(full_prompt).run(capture_stdout=True, capture_stderr=True)
os.chmod(output_clip_path, 0o777)
except ffmpeg_python.Error as e:
logging.info("stdout:", e.stdout.decode("utf8"))
logging.info("stderr:", e.stderr.decode("utf8"))
raise e
logging.info(f"Clip {clip_i} modified successfully")
def create_modified_clips(
clips_list: list,
movie_i: str,
modification_details: dict,
project: project_utils.Project,
gpu_available: bool,
pool_size: int = 4,
):
"""
This function takes a list of clips, a movie name, a dictionary of modifications, a project, and a
GPU availability flag, and returns a list of modified clips
:param clips_list: a list of the paths to the clips you want to modify
:param movie_i: the path to the movie you want to extract clips from
:param modification_details: a dictionary with the modifications to be applied to the clips. The
keys are the names of the modifications and the values are the parameters of the modifications
:param project: the project object
:param gpu_available: True if you have a GPU available, False if you don't
:param pool_size: the number of parallel processes to run, defaults to 4 (optional)
:return: The modified clips
"""
# Get project-specific server info
server = project.server
# Specify the folder to host the modified clips
mod_clip_folder = "modified_" + movie_i + "_clips"
if server == "SNIC":
mod_clips_folder = os.path.join(
"/cephyr/NOBACKUP/groups/snic2021-6-9/tmp_dir", mod_clip_folder
)
else:
mod_clips_folder = mod_clip_folder
# Remove existing modified clips
if os.path.exists(mod_clips_folder):
shutil.rmtree(mod_clips_folder)
if len(modification_details.values()) > 0:
# Create the folder to store the videos if not exist
if not os.path.exists(mod_clips_folder):
os.mkdir(mod_clips_folder)
os.chmod(str(mod_clips_folder), 0o777)
# Specify the number of parallel items
pool = Pool(pool_size)
# Create empty list to keep track of new clips
modified_clips = []
results = []
# Create the information for each clip and extract it (printing a progress bar)
for clip_i in clips_list:
# Create the filename and path of the modified clip
output_clip_name = "modified_" + os.path.basename(clip_i)
output_clip_path = os.path.join(mod_clips_folder, output_clip_name)
# Add the path of the clip to the list
modified_clips = modified_clips + [output_clip_path]
# Modify the clips and store them in the folder
results.append(
pool.apply_async(
modify_clips,
(
clip_i,
modification_details,
output_clip_path,
gpu_available,
),
)
)
pool.close()
pool.join()
return modified_clips
else:
logging.info("No modification selected")
# Display the clips side-by-side
def view_clips(example_clips: list, modified_clip_path: str):
"""
> This function takes in a list of example clips and a path to a modified clip, and returns a widget
that displays the original and modified clips side-by-side
:param example_clips: a list of paths to the original clips
:param modified_clip_path: The path to the modified clip you want to view
:return: A widget that displays the original and modified videos side-by-side.
"""
# Get the path of the modified clip selected
example_clip_name = os.path.basename(modified_clip_path).replace("modified_", "")
example_clip_path = next(
filter(lambda x: os.path.basename(x) == example_clip_name, example_clips), None
)
# Get the extension of the video
extension = os.path.splitext(example_clip_path)[1]
# Open original video
vid1 = open(example_clip_path, "rb").read()
wi1 = widgets.Video(value=vid1, format=extension, width=400, height=500)
# Open modified video
vid2 = open(modified_clip_path, "rb").read()
wi2 = widgets.Video(value=vid2, format=extension, width=400, height=500)
# Display videos side-by-side
a = [wi1, wi2]
wid = widgets.HBox(a)
return wid
def compare_clips(example_clips: list, modified_clips: list):
"""
> This function allows you to select a clip from the modified clips and displays the original and
modified clips side by side
:param example_clips: The original clips
:param modified_clips: The list of clips that you want to compare to the original clips
"""
# Add "no movie" option to prevent conflicts
modified_clips = np.append(modified_clips, "0 No movie")
clip_path_widget = widgets.Dropdown(
options=tuple(modified_clips),
description="Select original clip:",
ensure_option=True,
disabled=False,
layout=Layout(width="50%"),
style={"description_width": "initial"},
)
main_out = widgets.Output()
display(clip_path_widget, main_out)
# Display the original and modified clips
def on_change(change):
with main_out:
clear_output()
if change["new"] == "0 No movie":
logging.info("It is OK to modify the clips again")
else:
a = view_clips(example_clips, change["new"])
display(a)
clip_path_widget.observe(on_change, names="value")
############################################################
######## Create the clips to upload to Zooniverse ##########
############################################################
def select_clip_n_len(movie_i: str, db_info_dict: dict):
"""
This function allows the user to select the length of the clips to upload to the database
:param movie_i: the name of the movie you want to upload
:param db_info_dict: a dictionary containing the path to the database and the name of the database
:return: The number of clips to upload
"""
# Create connection to db
conn = db_utils.create_connection(db_info_dict["db_path"])
# Query info about the movie of interest
movie_df = pd.read_sql_query(
f"SELECT filename, duration, sampling_start, sampling_end FROM movies WHERE filename='{movie_i}'",
conn,
)
# Display in hours, minutes and seconds
def to_clips(clip_length, clips_range):
# Calculate the number of clips
clips = int((clips_range[1] - clips_range[0]) / clip_length)
logging.info(f"Number of clips to upload: {clips}")
return clips
# Select the number of clips to upload
clip_length_number = interactive(
to_clips,
clip_length=select_clip_length(),
clips_range=widgets.IntRangeSlider(
value=[movie_df.sampling_start.values, movie_df.sampling_end.values],
min=0,
max=int(movie_df.duration.values),
step=1,
description="Range in seconds:",
style={"description_width": "initial"},
layout=widgets.Layout(width="90%"),
),
)
display(clip_length_number)
return clip_length_number
def review_clip_selection(clip_selection, movie_i: str, clip_modification):
"""
> This function reviews the clips that will be created from the movie selected
:param clip_selection: the object that contains the results of the clip selection
:param movie_i: the movie you want to create clips from
:param clip_modification: The modification that will be applied to the clips
"""
start_trim = clip_selection.kwargs["clips_range"][0]
end_trim = clip_selection.kwargs["clips_range"][1]
# Review the clips that will be created
logging.info(
f"You are about to create {round(clip_selection.result)} clips from {movie_i}"
)
logging.info(
f"starting at {datetime.timedelta(seconds=start_trim)} and ending at {datetime.timedelta(seconds=end_trim)}"
)
logging.info(f"The modification selected is {clip_modification}")
# Func to expand seconds
def expand_list(df: pd.DataFrame, list_column: str, new_column: str):
"""
We take a dataframe with a column that contains lists, and we expand that column into a new
dataframe with a new column that contains the items in the list
:param df: the dataframe you want to expand
:param list_column: the column that contains the list
:param new_column: the name of the new column that will be created
:return: A dataframe with the list column expanded into a new column.
"""
lens_of_lists = df[list_column].apply(len)
origin_rows = range(df.shape[0])
destination_rows = np.repeat(origin_rows, lens_of_lists)
non_list_cols = [idx for idx, col in enumerate(df.columns) if col != list_column]
expanded_df = df.iloc[destination_rows, non_list_cols].copy()
expanded_df[new_column] = [item for items in df[list_column] for item in items]
expanded_df.reset_index(inplace=True, drop=True)
return expanded_df
# Function to extract the videos
def extract_clips(
movie_path: str,
clip_length: int,
upl_second_i: int,
output_clip_path: str,
modification_details: dict,
gpu_available: bool,
):
"""
This function takes in a movie path, a clip length, a starting second index, an output clip path, a
dictionary of modification details, and a boolean indicating whether a GPU is available. It then
extracts a clip from the movie, and applies the modifications specified in the dictionary.
The function is written in such a way that it can be used to extract clips from a movie, and apply
modifications to the clips.
:param movie_path: The path to the movie file
:param clip_length: The length of the clip in seconds
:param upl_second_i: The second in the video to start the clip
:param output_clip_path: The path to the output clip
:param modification_details: a dictionary of dictionaries, where each dictionary contains the
details of a modification to be made to the video. The keys of the dictionary are the names of the
modifications, and the values are dictionaries containing the details of the modification. The
details of the modification are:
:param gpu_available: If you have a GPU, set this to True. If you don't, set it to False
"""
if not modification_details and gpu_available:
# Create clips without any modification
subprocess.call(
[
"ffmpeg",
"-hwaccel",
"cuda",
"-hwaccel_output_format",
"cuda",
"-ss",
str(upl_second_i),
"-t",
str(clip_length),
"-i",
movie_path,
"-an", # removes the audio
"-c:a",
"copy",
"-c:v",
"h264_nvenc",
str(output_clip_path),
]
)
os.chmod(str(output_clip_path), 0o777)
elif modification_details and gpu_available:
# Unnest the modification detail dict
df = pd.json_normalize(modification_details, sep="_")
b_v = df.filter(regex="bv$", axis=1).values[0][0] + "M"
subprocess.call(
[
"ffmpeg",
"-hwaccel",
"cuda",
"-hwaccel_output_format",
"cuda",
"-ss",
str(upl_second_i),
"-t",
str(clip_length),
"-i",
movie_path,
"-an", # removes the audio
"-c:a",
"copy",
"-c:v",
"h264_nvenc",
"-b:v",
b_v,
str(output_clip_path),
]
)
os.chmod(str(output_clip_path), 0o777)
else:
# Set up input prompt
init_prompt = f"ffmpeg_python.input('{movie_path}')"
full_prompt = init_prompt
mod_prompt = ''
def_output_prompt = f".output('{str(output_clip_path)}', ss={str(upl_second_i)}, t={str(clip_length)}, crf=20, pix_fmt='yuv420p', vcodec='libx264')"
# Set up modification
for transform in modification_details.values():
if "filter" in transform:
mod_prompt += transform["filter"]
else:
# Unnest the modification detail dict
df = pd.json_normalize(modification_details, sep="_")
crf = df.filter(regex="crf$", axis=1).values[0][0]
output_prompt = f".output('{str(output_clip_path)}', crf={crf}, ss={str(upl_second_i)}, t={str(clip_length)}, preset='veryfast', pix_fmt='yuv420p', vcodec='libx264')"
# Run the modification
try:
if len(mod_prompt) > 0:
full_prompt += mod_prompt
if output_prompt:
full_prompt += output_prompt
else:
full_prompt += def_output_prompt
eval(full_prompt).run(capture_stdout=True, capture_stderr=True)
os.chmod(str(output_clip_path), 0o777)
except ffmpeg_python.Error as e:
logging.info("stdout:", e.stdout.decode("utf8"))
logging.info("stderr:", e.stderr.decode("utf8"))
raise e
logging.info("Clips extracted successfully")
def create_clips(
available_movies_df: pd.DataFrame,
movie_i: str,
movie_path: str,
db_info_dict: dict,
clip_selection,
project: project_utils.Project,
modification_details: dict,
gpu_available: bool,
pool_size: int = 4,
):
"""
This function takes a movie and extracts clips from it
:param available_movies_df: the dataframe with the movies that are available for the project
:param movie_i: the name of the movie you want to extract clips from
:param movie_path: the path to the movie you want to extract clips from
:param db_info_dict: a dictionary with the database information
:param clip_selection: a ClipSelection object
:param project: the project object
:param modification_details: a dictionary with the following keys:
:param gpu_available: True or False, depending on whether you have a GPU available to use
:param pool_size: the number of threads to use to extract the clips, defaults to 4 (optional)
:return: A dataframe with the clip_path, clip_filename, clip_length, upl_seconds, and
clip_modification_details
"""
# Filter the df for the movie of interest
movie_i_df = available_movies_df[
available_movies_df["filename"] == movie_i
].reset_index(drop=True)
# Calculate the max number of clips available
clip_length = clip_selection.kwargs["clip_length"]
clip_numbers = clip_selection.result
start_trim = clip_selection.kwargs["clips_range"][0]
end_trim = clip_selection.kwargs["clips_range"][1]
# Calculate all the seconds for the new clips to start
movie_i_df["seconds"] = [
list(
range(
start_trim,
int(math.floor(end_trim / clip_length) * clip_length),
clip_length,
)
)
]
# Reshape the dataframe with the seconds for the new clips to start on the rows
potential_start_df = expand_list(movie_i_df, "seconds", "upl_seconds")
# Specify the length of the clips
potential_start_df["clip_length"] = clip_length
if not clip_numbers == potential_start_df.shape[0]:
logging.info(
f"There was an issue estimating the starting seconds for the {clip_numbers} clips"
)
# Get project-specific server info
server = project.server
# Specify the temp folder to host the clips
temp_clip_folder = movie_i + "_zooniverseclips"
if server == "SNIC":
clips_folder = os.path.join(
"/cephyr/NOBACKUP/groups/snic2021-6-9/tmp_dir/", temp_clip_folder
)
else:
clips_folder = temp_clip_folder
# Set the filename of the clips
potential_start_df["clip_filename"] = (
movie_i
+ "_clip_"
+ potential_start_df["upl_seconds"].astype(str)
+ "_"
+ str(clip_length)
+ ".mp4"
)
# Set the path of the clips
potential_start_df["clip_path"] = (
clips_folder + os.sep + potential_start_df["clip_filename"]
)
# Create the folder to store the videos if not exist
if not os.path.exists(clips_folder):
os.mkdir(clips_folder)
os.chmod(str(clips_folder), 0o777)
else:
shutil.rmtree(clips_folder)
os.mkdir(clips_folder)
os.chmod(str(clips_folder), 0o777)
logging.info("Extracting clips")
for i in range(0, potential_start_df.shape[0], pool_size):
logging.info(
f"Modifying {i} to {i+pool_size} out of {potential_start_df.shape[0]}"
)
threadlist = []
# Read each movie and extract the clips
for index, row in potential_start_df.iloc[i : i + pool_size].iterrows():
# Extract the videos and store them in the folder
t = threading.Thread(
target=extract_clips,
args=(
movie_path,
clip_length,
row["upl_seconds"],
row["clip_path"],
modification_details,
gpu_available,
),
)
threadlist.append(t)
t.start()
for tr in threadlist:
tr.join()
# Add information on the modification of the clips
potential_start_df["clip_modification_details"] = str(modification_details)
return potential_start_df
def set_zoo_metadata(
df: pd.DataFrame, project: project_utils.Project, db_info_dict: dict
):
"""
It takes a dataframe with clips, and adds metadata about the site and project to it
:param df: the dataframe with the clips to upload
:param project: the project object
:param db_info_dict: a dictionary with the following keys:
:return: upload_to_zoo, sitename, created_on
"""
# Create connection to db
conn = db_utils.create_connection(db_info_dict["db_path"])
# Query info about the movie of interest
sitesdf = pd.read_sql_query("SELECT * FROM sites", conn)
# Rename the id column to match movies df
sitesdf = sitesdf.rename(
columns={
"id": "site_id",
}
)
# Combine site info to the df
if "site_id" in df.columns:
upload_to_zoo = df.merge(sitesdf, on="site_id")
sitename = upload_to_zoo["siteName"].unique()[0]
else:
raise ValueError("Sites table empty. Perhaps try to rebuild the initial db.")
# Rename columns to match schema names
# (fields that begin with “#” or “//” will never be shown to volunteers)
# (fields that begin with "!" will only be available for volunteers on the Talk section, after classification)
upload_to_zoo = upload_to_zoo.rename(
columns={
"id": "movie_id",
"created_on": "#created_on",
"clip_length": "#clip_length",
"filename": "#VideoFilename",
"clip_modification_details": "#clip_modification_details",
"siteName": "#siteName",