Skip to content

Commit

Permalink
report mode GPU is in and write out gpu UUID (#699)
Browse files Browse the repository at this point in the history
* report mode GPU is in and write out gpu UUID

* check for Exclusive_Process instead of Default

* use try/except to be more robust

* Update openmmtools/multistate/multistatesampler.py

Co-authored-by: Iván Pulido <[email protected]>

* make the subprocess throw an exception if there is an error, fix logic in compute mode detection

* make debug message more helpful

* fix UnboundLocalError: local variable 'cuda_query_output' referenced before assignment

* just check the error code

* warn method has been deprecated since version 3.2

---------

Co-authored-by: EC2 Default User <[email protected]>
Co-authored-by: Iván Pulido <[email protected]>
  • Loading branch information
3 people authored Jun 2, 2023
1 parent 3cf98eb commit abb2f61
Showing 1 changed file with 14 additions and 5 deletions.
19 changes: 14 additions & 5 deletions openmmtools/multistate/multistatesampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import inspect
import logging
import datetime
import subprocess

import numpy as np

Expand Down Expand Up @@ -1773,11 +1774,19 @@ def _update_timing(self, iteration_time, partial_total_time, run_initial_iterati
@staticmethod
def _display_cuda_devices():
"""Query system nvidia-smi to get available GPUs indices and names in debug log."""
# Read nvidia-smi query, should return empty strip if no GPU is found.
cuda_query_output = os.popen("nvidia-smi --query-gpu=index,gpu_name --format=csv,noheader").read().strip()
# Split by line jump and comma
cuda_devices_list = [entry.split(',') for entry in cuda_query_output.split('\n')]
logger.debug(f"CUDA devices available: {*cuda_devices_list,}")

cuda_query_output = subprocess.run("nvidia-smi --query-gpu=gpu_uuid,gpu_name,compute_mode --format=csv", shell=True, capture_output=True, text=True)
# Check if command worked
if cuda_query_output.returncode == 0:
# Split by line jump and comma
cuda_devices_list = [entry for entry in cuda_query_output.stdout.splitlines()]
logger.debug(f"CUDA devices available: {*cuda_devices_list,}")
# We only support "Default" and not "Exclusive_Process" for the compute mode
if "Default" not in cuda_query_output.stdout:
logger.warning(f"GPU in 'Exclusive_Process' mode (or Prohibited), one context is allowed per device. This may prevent some openmmtools features from working. GPU must be in 'Default' compute mode")
# Handel the case where the command had some error
else:
logger.debug(f"nvidia-smi command failed: {cuda_query_output.stderr}, this is expected if there is no GPU available")

def _flatten_moves_iterator(self):
"""Recursively flatten MCMC moves. Handles the cases where each move can be a set of moves, for example with
Expand Down

0 comments on commit abb2f61

Please sign in to comment.