-
Notifications
You must be signed in to change notification settings - Fork 63
/
Copy pathmanage.py
executable file
·257 lines (220 loc) · 7.04 KB
/
manage.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
#!/usr/bin/env python
import argparse
import ast
import hashlib
import os
import subprocess
import sys
import uuid
from huggingface_hub import HfApi
class cd:
"""Context manager for changing the current working directory"""
def __init__(self, newPath):
self.newPath = os.path.expanduser(newPath)
def __enter__(self):
self.savedPath = os.getcwd()
os.chdir(self.newPath)
def __exit__(self, etype, value, traceback):
os.chdir(self.savedPath)
class DockerPopen(subprocess.Popen):
def __exit__(self, exc_type, exc_val, traceback):
self.terminate()
self.wait(5)
return super().__exit__(exc_type, exc_val, traceback)
def create_docker(name: str, is_gpu: bool) -> str:
rand = str(uuid.uuid4())[:5]
tag = f"{name}:{rand}"
with cd(
os.path.join(os.path.dirname(os.path.normpath(__file__)), "docker_images", name)
):
subprocess.run(["docker", "build", ".", "-t", tag])
return tag
def resolve_dataset(args, task: str):
import datasets
builder = datasets.load_dataset_builder(
args.dataset_name, use_auth_token=args.token
)
if args.dataset_config is None:
args.dataset_config = builder.config_id
print(f"Inferred dataset_config {args.dataset_config}")
splits = builder.info.splits
if splits is not None:
if args.dataset_split not in splits:
raise ValueError(
f"The split `{args.dataset_split}` is not a valid split, please choose from {','.join(splits.keys())}"
)
task_templates = builder.info.task_templates
if task_templates is not None:
for task_template in task_templates:
if task_template.task == task:
args.dataset_column = task_template.audio_file_path_column
print(f"Inferred dataset_column {args.dataset_column}")
return (
args.dataset_name,
args.dataset_config,
args.dataset_split,
args.dataset_column,
)
def get_repo_name(model_id: str, dataset_name: str) -> str:
# Hash needs to have the fully qualified name to disambiguate.
hash_ = hashlib.md5((model_id + dataset_name).encode("utf-8")).hexdigest()
model_name = model_id.split("/")[-1]
dataset_name = dataset_name.split("/")[-1]
return f"bulk-{model_name[:10]}-{dataset_name[:10]}-{hash_[:5]}"
def show(args):
directory = os.path.join(
os.path.dirname(os.path.normpath(__file__)), "docker_images"
)
for framework in sorted(os.listdir(directory)):
print(f"{framework}")
local_path = os.path.join(
os.path.dirname(os.path.normpath(__file__)),
"docker_images",
framework,
"app",
"main.py",
)
# Using ast to prevent import issues with missing dependencies.
# and slow loads.
with open(local_path, "r") as source:
tree = ast.parse(source.read())
for item in tree.body:
if (
isinstance(item, ast.AnnAssign)
and item.target.id == "ALLOWED_TASKS"
):
for key in item.value.keys:
print(" " * 4, key.value)
def resolve(model_id: str) -> [str, str]:
try:
info = HfApi().model_info(model_id)
except Exception as e:
raise ValueError(
f"The hub has no information on {model_id}, does it exist: {e}"
)
try:
task = info.pipeline_tag
except Exception:
raise ValueError(
f"The hub has no `pipeline_tag` on {model_id}, you can set it in the `README.md` yaml header"
)
try:
framework = info.library_name
except Exception:
raise ValueError(
f"The hub has no `library_name` on {model_id}, you can set it in the `README.md` yaml header"
)
return task, framework.replace("-", "_")
def resolve_task_framework(args):
model_id = args.model
task = args.task
framework = args.framework
if task is None or framework is None:
rtask, rframework = resolve(model_id)
if task is None:
task = rtask
print(f"Inferred task : {task}")
if framework is None:
framework = rframework
print(f"Inferred framework : {framework}")
return model_id, task, framework
def start(args):
import uvicorn
model_id, task, framework = resolve_task_framework(args)
local_path = os.path.join(
os.path.dirname(os.path.normpath(__file__)), "docker_images", framework
)
sys.path.append(local_path)
os.environ["MODEL_ID"] = model_id
os.environ["TASK"] = task
if args.gpu:
os.environ["COMPUTE_TYPE"] = "gpu"
uvicorn.run("app.main:app", host="127.0.0.1", port=8000, log_level="info")
def docker(args):
model_id, task, framework = resolve_task_framework(args)
tag = create_docker(framework, is_gpu=args.gpu)
gpu = ["--gpus", "all"] if args.gpu else []
run_docker_command = [
"docker",
"run",
*gpu,
"-p",
"8000:80",
"-e",
f"TASK={task}",
"-e",
f"MODEL_ID={model_id}",
"-e",
f"COMPUTE_TYPE={'gpu' if args.gpu else 'cpu'}",
"-e",
f"DEBUG={os.getenv('DEBUG', '0')}",
"-v",
"/tmp:/data",
"-t",
tag,
]
print(" ".join(run_docker_command))
with DockerPopen(run_docker_command) as proc:
try:
proc.wait()
except KeyboardInterrupt:
proc.terminate()
def main():
parser = argparse.ArgumentParser()
subparsers = parser.add_subparsers()
parser_start = subparsers.add_parser(
"start", help="Start a local version of a model inference"
)
parser_start.add_argument(
"model",
type=str,
help="Which model_id to start.",
)
parser_start.add_argument(
"--task",
type=str,
help="Which task to load",
)
parser_start.add_argument(
"--framework",
type=str,
help="Which framework to load",
)
parser_start.add_argument(
"--gpu",
action="store_true",
help="Using gpu ?",
)
parser_start.set_defaults(func=start)
parser_docker = subparsers.add_parser(
"docker", help="Start a docker version of a model inference"
)
parser_docker.add_argument(
"model",
type=str,
help="Which model_id to docker.",
)
parser_docker.add_argument(
"--task",
type=str,
help="Which task to load",
)
parser_docker.add_argument(
"--framework",
type=str,
help="Which framework to load",
)
parser_docker.add_argument(
"--gpu",
action="store_true",
help="Using gpu ?",
)
parser_docker.set_defaults(func=docker)
parser_show = subparsers.add_parser(
"show", help="Show dockers and the various pipelines they implement"
)
parser_show.set_defaults(func=show)
args = parser.parse_args()
args.func(args)
if __name__ == "__main__":
main()