From abb2f615b730c4b42517d2a3da8af1d58e0a7b6c Mon Sep 17 00:00:00 2001 From: Mike Henry <11765982+mikemhenry@users.noreply.github.com> Date: Fri, 2 Jun 2023 08:31:42 -0700 Subject: [PATCH] report mode GPU is in and write out gpu UUID (#699) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * report mode GPU is in and write out gpu UUID * check for Exclusive_Process instead of Default * use try/except to be more robust * Update openmmtools/multistate/multistatesampler.py Co-authored-by: Iván Pulido <2949729+ijpulidos@users.noreply.github.com> * make the subprocess throw an exception if there is an error, fix logic in compute mode detection * make debug message more helpful * fix UnboundLocalError: local variable 'cuda_query_output' referenced before assignment * just check the error code * warn method has been deprecated since version 3.2 --------- Co-authored-by: EC2 Default User Co-authored-by: Iván Pulido <2949729+ijpulidos@users.noreply.github.com> --- openmmtools/multistate/multistatesampler.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/openmmtools/multistate/multistatesampler.py b/openmmtools/multistate/multistatesampler.py index 31d49ffa..5f6594cc 100644 --- a/openmmtools/multistate/multistatesampler.py +++ b/openmmtools/multistate/multistatesampler.py @@ -35,6 +35,7 @@ import inspect import logging import datetime +import subprocess import numpy as np @@ -1773,11 +1774,19 @@ def _update_timing(self, iteration_time, partial_total_time, run_initial_iterati @staticmethod def _display_cuda_devices(): """Query system nvidia-smi to get available GPUs indices and names in debug log.""" - # Read nvidia-smi query, should return empty strip if no GPU is found. - cuda_query_output = os.popen("nvidia-smi --query-gpu=index,gpu_name --format=csv,noheader").read().strip() - # Split by line jump and comma - cuda_devices_list = [entry.split(',') for entry in cuda_query_output.split('\n')] - logger.debug(f"CUDA devices available: {*cuda_devices_list,}") + + cuda_query_output = subprocess.run("nvidia-smi --query-gpu=gpu_uuid,gpu_name,compute_mode --format=csv", shell=True, capture_output=True, text=True) + # Check if command worked + if cuda_query_output.returncode == 0: + # Split by line jump and comma + cuda_devices_list = [entry for entry in cuda_query_output.stdout.splitlines()] + logger.debug(f"CUDA devices available: {*cuda_devices_list,}") + # We only support "Default" and not "Exclusive_Process" for the compute mode + if "Default" not in cuda_query_output.stdout: + logger.warning(f"GPU in 'Exclusive_Process' mode (or Prohibited), one context is allowed per device. This may prevent some openmmtools features from working. GPU must be in 'Default' compute mode") + # Handel the case where the command had some error + else: + logger.debug(f"nvidia-smi command failed: {cuda_query_output.stderr}, this is expected if there is no GPU available") def _flatten_moves_iterator(self): """Recursively flatten MCMC moves. Handles the cases where each move can be a set of moves, for example with