forked from ocean-data-factory-sweden/kso-utils
-
Notifications
You must be signed in to change notification settings - Fork 0
/
t11_utils.py
214 lines (175 loc) · 7.46 KB
/
t11_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
# base imports
import pandas as pd
import datetime
import os
import subprocess
import logging
# widget imports
from IPython.display import display
from ipywidgets import Layout
import ipywidgets as widgets
# util imports
import kso_utils.movie_utils as movie_utils
import kso_utils.spyfish_utils as spyfish_utils
import kso_utils.server_utils as server_utils
import kso_utils.project_utils as project_utils
# Logging
logging.basicConfig()
logging.getLogger().setLevel(logging.INFO)
def check_movies_from_server(db_info_dict: dict, project: project_utils.Project):
"""
It takes in a dataframe with movies information and a dictionary with the database information, and
returns two dataframes: one with the movies that are missing from the server, and one with the
movies that are missing from the csv
:param db_info_dict: a dictionary with the following keys:
:param project: the project object
"""
# Load the csv with movies information
movies_df = pd.read_csv(db_info_dict["local_movies_csv"])
# Check if the project is the Spyfish Aotearoa
if project.Project_name == "Spyfish_Aotearoa":
# Retrieve movies that are missing info in the movies.csv
missing_info = spyfish_utils.check_spyfish_movies(movies_df, db_info_dict)
# Find out files missing from the Server
missing_from_server = missing_info[missing_info["_merge"] == "left_only"]
logging.info("There are", len(missing_from_server.index), "movies missing")
# Find out files missing from the csv
missing_from_csv = missing_info[missing_info["_merge"] == "right_only"].reset_index(
drop=True
)
logging.info(
"There are", len(missing_from_csv.index), "movies missing from movies.csv"
)
# print("There are", len(missing_from_csv.index), "movies missing from movies.csv. Their filenames are:")
# print(*missing_from_csv.filename.unique(), sep = "\n")
return missing_from_server, missing_from_csv
def select_deployment(missing_from_csv: pd.DataFrame):
"""
> This function takes a dataframe of missing files and returns a widget that allows the user to
select the deployment of interest
:param missing_from_csv: a dataframe of the files that are in the data folder but not in the csv
file
:return: A widget object
"""
if missing_from_csv.shape[0] > 0:
# Widget to select the deployment of interest
deployment_widget = widgets.SelectMultiple(
options=missing_from_csv.deployment_folder.unique(),
description="New deployment:",
disabled=False,
rows=10,
layout=Layout(width="80%"),
style={"description_width": "initial"},
)
display(deployment_widget)
return deployment_widget
def select_eventdate():
"""
> This function creates a date picker widget that allows the user to select a date.
The function is called `select_eventdate()` and it returns a date picker widget.
:return: The date widget
"""
# Select the date
date_widget = widgets.DatePicker(
description="Date of deployment:",
value=datetime.date.today(),
disabled=False,
layout=Layout(width="50%"),
style={"description_width": "initial"},
)
display(date_widget)
return date_widget
def update_new_deployments(
deployment_selected, db_info_dict: dict, event_date: widgets.Widget
):
"""
> The function `update_new_deployments` takes a list of deployments, a dictionary with the database
information, and an event date and concatenates the movies inside each deployment
:param deployment_selected: the deployment you want to concatenate
:param db_info_dict: a dictionary with the following keys:
:type db_info_dict: dict
:param event_date: the date of the event you want to concatenate
"""
for deployment_i in deployment_selected.value:
logging.info(
f"Starting to concatenate {deployment_i} out of {len(deployment_selected.value)} deployments selected"
)
# Get a dataframe of movies from the deployment
movies_s3_pd = server_utils.get_matching_s3_keys(
db_info_dict["client"],
db_info_dict["bucket"],
prefix=deployment_i,
suffix=movie_utils.get_movie_extensions(),
)
# Create a list of the list of movies inside the deployment selected
movie_files_server = movies_s3_pd.Key.unique().tolist()
if len(movie_files_server) < 2:
logging.info(
f"Deployment {deployment_i} will not be concatenated because it only has {movies_s3_pd.Key.unique()}"
)
else:
# Concatenate the files if multiple
logging.info("The files", movie_files_server, "will be concatenated")
# Start text file and list to keep track of the videos to concatenate
textfile_name = "a_file.txt"
textfile = open(textfile_name, "w")
video_list = []
for movie_i in sorted(movie_files_server):
# Specify the temporary output of the go pro file
movie_i_output = movie_i.split("/")[-1]
# Download the files from the S3 bucket
if not os.path.exists(movie_i_output):
server_utils.download_object_from_s3(
client=db_info_dict["client"],
bucket=db_info_dict["bucket"],
key=movie_i,
filename=movie_i_output,
)
# Keep track of the videos to concatenate
textfile.write("file '" + movie_i_output + "'" + "\n")
video_list.append(movie_i_output)
textfile.close()
# Save eventdate as str
EventDate_str = event_date.value.strftime("%d_%m_%Y")
# Specify the name of the concatenated video
filename = deployment_i.split("/")[-1] + "_" + EventDate_str + ".MP4"
# Concatenate the files
if not os.path.exists(filename):
logging.info("Concatenating ", filename)
# Concatenate the videos
subprocess.call(
[
"ffmpeg",
"-f",
"concat",
"-safe",
"0",
"-i",
"a_file.txt",
"-c",
"copy",
filename,
]
)
# Upload the concatenated video to the S3
server_utils.upload_file_to_s3(
db_info_dict["client"],
bucket=db_info_dict["bucket"],
key=deployment_i + "/" + filename,
filename=filename,
)
logging.info(filename, "successfully uploaded to", deployment_i)
# Delete the raw videos downloaded from the S3 bucket
for f in video_list:
os.remove(f)
# Delete the text file
os.remove(textfile_name)
# Delete the concat video
os.remove(filename)
# Delete the movies from the S3 bucket
for movie_i in sorted(movie_files_server):
server_utils.delete_file_from_s3(
client=db_info_dict["client"],
bucket=db_info_dict["bucket"],
key=movie_i,
)