Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable selection of GPU on inference page #1511

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion digits/inference/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class InferenceJob(Job):
A Job that exercises the forward pass of a neural network
"""

def __init__(self, model, images, epoch, layers, resize=True, **kwargs):
def __init__(self, model, images, epoch, layers, resize=True, gpu=None, **kwargs):
"""
Arguments:
model -- job object associated with model to perform inference on
Expand All @@ -40,6 +40,7 @@ def __init__(self, model, images, epoch, layers, resize=True, **kwargs):
epoch=epoch,
layers=layers,
resize=resize,
gpu=gpu
))

@override
Expand Down
6 changes: 4 additions & 2 deletions digits/inference/tasks/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class InferenceTask(Task):
A task for inference jobs
"""

def __init__(self, model, images, epoch, layers, resize, **kwargs):
def __init__(self, model, images, epoch, layers, resize, gpu=None, **kwargs):
"""
Arguments:
model -- trained model to perform inference on
Expand All @@ -40,7 +40,7 @@ def __init__(self, model, images, epoch, layers, resize, **kwargs):
self.inference_log_file = "inference.log"

# resources
self.gpu = None
self.gpu = gpu

# generated data
self.inference_data_filename = None
Expand Down Expand Up @@ -180,6 +180,8 @@ def offer_resources(self, resources):
if resources[gpu_key]:
for resource in resources[gpu_key]:
if resource.remaining() >= 1:
if self.gpu is not None and self.gpu != int(resource.identifier):
continue
self.gpu = int(resource.identifier)
reserved_resources[gpu_key] = [(resource.identifier, 1)]
break
Expand Down
37 changes: 15 additions & 22 deletions digits/model/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,37 +305,30 @@ def validate_custom_network_snapshot(form, field):
if filename and not os.path.exists(filename):
raise validators.ValidationError('File "%s" does not exist' % filename)

# List of GPUs
gpu_list = [(
index,
'#%s - %s (%s memory)' % (
index,
get_device(index).name,
sizeof_fmt(
get_nvml_info(index)['memory']['total']
if get_nvml_info(index) and 'memory' in get_nvml_info(index)
else get_device(index).totalGlobalMem)
),
) for index in config_value('gpu_list').split(',') if index]

# Select one of several GPUs
select_gpu = wtforms.RadioField(
'Select which GPU you would like to use',
choices=[('next', 'Next available')] + [(
index,
'#%s - %s (%s memory)' % (
index,
get_device(index).name,
sizeof_fmt(
get_nvml_info(index)['memory']['total']
if get_nvml_info(index) and 'memory' in get_nvml_info(index)
else get_device(index).totalGlobalMem)
),
) for index in config_value('gpu_list').split(',') if index],
choices=[('next', 'Next available')] + gpu_list,
default='next',
)

# Select N of several GPUs
select_gpus = utils.forms.SelectMultipleField(
'Select which GPU[s] you would like to use',
choices=[(
index,
'#%s - %s (%s memory)' % (
index,
get_device(index).name,
sizeof_fmt(
get_nvml_info(index)['memory']['total']
if get_nvml_info(index) and 'memory' in get_nvml_info(index)
else get_device(index).totalGlobalMem)
),
) for index in config_value('gpu_list').split(',') if index],
choices=gpu_list,
tooltip="The job won't start until all of the chosen GPUs are available."
)

Expand Down
19 changes: 14 additions & 5 deletions digits/model/images/classification/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from digits.pretrained_model.job import PretrainedModelJob
from digits.status import Status
from digits.utils import filesystem as fs
from digits.utils.forms import fill_form_if_cloned, save_form_to_job
from digits.utils.forms import fill_form_if_cloned, save_form_to_job, get_selected_gpu
from digits.utils.routing import request_wants_json, job_from_request
from digits.webapp import scheduler

Expand Down Expand Up @@ -333,14 +333,16 @@ def show(job, related_jobs=None):
"""
Called from digits.model.views.models_show()
"""
form = ImageClassificationModelForm()
return flask.render_template(
'models/images/classification/show.html',
job=job,
framework_ids=[
fw.get_id()
for fw in frameworks.get_frameworks()
],
related_jobs=related_jobs
related_jobs=related_jobs,
gpu_list=form.gpu_list,
)


Expand Down Expand Up @@ -384,14 +386,17 @@ def classify_one():
if 'show_visualizations' in flask.request.form and flask.request.form['show_visualizations']:
layers = 'all'

selected_gpu = get_selected_gpu(flask.request.form)

# create inference job
inference_job = ImageInferenceJob(
username=utils.auth.get_username(),
name="Classify One Image",
model=model_job,
images=[image_path],
epoch=epoch,
layers=layers
layers=layers,
gpu=selected_gpu
)

# schedule tasks
Expand Down Expand Up @@ -477,6 +482,7 @@ def classify_many():
epoch = float(flask.request.form['snapshot_epoch'])

paths, ground_truths = read_image_list(image_list, image_folder, num_test_images)
selected_gpu = get_selected_gpu(flask.request.form)

# create inference job
inference_job = ImageInferenceJob(
Expand All @@ -485,7 +491,8 @@ def classify_many():
model=model_job,
images=paths,
epoch=epoch,
layers='none'
layers='none',
gpu=selected_gpu
)

# schedule tasks
Expand Down Expand Up @@ -633,6 +640,7 @@ def top_n():
num_test_images = None

paths, _ = read_image_list(image_list, image_folder, num_test_images)
selected_gpu = get_selected_gpu(flask.request.form)

# create inference job
inference_job = ImageInferenceJob(
Expand All @@ -641,7 +649,8 @@ def top_n():
model=model_job,
images=paths,
epoch=epoch,
layers='none'
layers='none',
gpu=selected_gpu
)

# schedule tasks
Expand Down
16 changes: 15 additions & 1 deletion digits/model/images/generic/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from digits.status import Status
from digits.utils import filesystem as fs
from digits.utils import constants
from digits.utils.forms import fill_form_if_cloned, save_form_to_job
from digits.utils.forms import fill_form_if_cloned, save_form_to_job, get_selected_gpu
from digits.utils.routing import request_wants_json, job_from_request
from digits.webapp import scheduler

Expand Down Expand Up @@ -309,12 +309,14 @@ def show(job, related_jobs=None):
template, context = extension.get_inference_template(form)
inference_form_html = flask.render_template_string(template, **context)

generic_form = GenericImageModelForm()
return flask.render_template(
'models/images/generic/show.html',
job=job,
view_extensions=view_extensions,
related_jobs=related_jobs,
inference_form_html=inference_form_html,
gpu_list=generic_form.gpu_list,
)


Expand Down Expand Up @@ -361,6 +363,8 @@ def infer_one():
else:
resize = True

selected_gpu = get_selected_gpu(flask.request.form)

# create inference job
inference_job = ImageInferenceJob(
username=utils.auth.get_username(),
Expand All @@ -370,6 +374,7 @@ def infer_one():
epoch=epoch,
layers=layers,
resize=resize,
gpu=selected_gpu
)

# schedule tasks
Expand Down Expand Up @@ -446,6 +451,8 @@ def infer_extension():
if 'show_visualizations' in flask.request.form and flask.request.form['show_visualizations']:
layers = 'all'

selected_gpu = get_selected_gpu(flask.request.form)

# create inference job
inference_job = ImageInferenceJob(
username=utils.auth.get_username(),
Expand All @@ -455,6 +462,7 @@ def infer_extension():
epoch=epoch,
layers=layers,
resize=False,
gpu=selected_gpu
)

# schedule tasks
Expand Down Expand Up @@ -539,6 +547,8 @@ def infer_db():
else:
resize = True

selected_gpu = get_selected_gpu(flask.request.form)

# create inference job
inference_job = ImageInferenceJob(
username=utils.auth.get_username(),
Expand All @@ -548,6 +558,7 @@ def infer_db():
epoch=epoch,
layers='none',
resize=resize,
gpu=selected_gpu
)

# schedule tasks
Expand Down Expand Up @@ -633,6 +644,8 @@ def infer_many():
else:
resize = True

selected_gpu = get_selected_gpu(flask.request.form)

paths = []

for line in image_list.readlines():
Expand Down Expand Up @@ -664,6 +677,7 @@ def infer_many():
epoch=epoch,
layers='none',
resize=resize,
gpu=selected_gpu
)

# schedule tasks
Expand Down
43 changes: 37 additions & 6 deletions digits/templates/models/images/classification/show.html
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
<script src="{{ url_for('static', filename='js/model-graphs.js', ver=dir_hash) }}"></script>

{% set task = job.train_task() %}
{% set show_multi_gpu_form = gpu_list| length > 1 %}

<div class="row">
<div class="col-sm-6">
Expand Down Expand Up @@ -102,13 +103,9 @@ <h4 class='text-center'>Dataset</h4>
{% endif %}
>
<h2>Trained Models</h2>
<div class="row">
<div class="col-sm-12">
<label for="snapshot_epoch">Select Model</label>
</div>
</div>
<div class="row">
<div class="col-sm-6">
<label for="snapshot_epoch">Select Model</label>
<div class="form-group">
<select id="snapshot_epoch" name="snapshot_epoch" class="form-control">
</select>
Expand Down Expand Up @@ -139,9 +136,41 @@ <h2>Trained Models</h2>
updateSnapshotList({% autoescape false %}{{task.snapshot_list()}}{% endautoescape %});
</script>
</div>
{% if show_multi_gpu_form %}
<button
formaction="{{url_for('digits.model.views.download', job_id=job.id())}}"
formmethod="post"
formenctype="multipart/form-data"
class="btn btn-info">
Download Model
</button>
<button
formaction="{{url_for('digits.model.views.to_pretrained', job_id=job.id())}}"
formmethod="post"
formenctype="multipart/form-data"
class="btn btn-success">
Make Pretrained Model
</button>
{% endif %}
</div>
{% if show_multi_gpu_form %}
<div class="col-sm-6">
<div class="form-group">
<label for="select_one_of_gpus">Select which GPU you would like to use</label>
<select class="form-control" id="select_one_of_gpus" name="select_one_of_gpus" size="4">
<option selected="" value="next">Next available</option>
{% for gpu_id, gpu_description in gpu_list %}
<option value="{{ gpu_id }}">{{ gpu_description }}</option>
{% endfor %}
</select>
</div>
</div>
{% else %}
<div class="col-sm-6">
<button
<label for="empty space">&nbsp;</label>
</div>
<div class="col-sm-6">
<button
formaction="{{url_for('digits.model.views.download', job_id=job.id())}}"
formmethod="post"
formenctype="multipart/form-data"
Expand All @@ -156,6 +185,7 @@ <h2>Trained Models</h2>
Make Pretrained Model
</button>
</div>
{% endif %}
</div>
{% if task.get_framework_id() in framework_ids %}
<div class="row">
Expand Down Expand Up @@ -184,6 +214,7 @@ <h3>Test a single image</h3>
</div>
</div>
</div>

<script type="text/javascript">
// When you fill in one field, the other gets blanked out
$("#image_path").change(function() { $("#image_file").val(""); });
Expand Down
Loading