Skip to content

Commit

Permalink
Add GPU test (#30)
Browse files Browse the repository at this point in the history
Adds GPU test originally built by @robsyme.
  • Loading branch information
adamrtalbot authored Dec 3, 2024
1 parent 379a4fa commit f4821c5
Show file tree
Hide file tree
Showing 4 changed files with 96 additions and 1 deletion.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -124,3 +124,9 @@ Tests moving the contents of a folder to a new folder within the working directo
### `TEST_VAL_INPUT`

Test a process can accept a value as input.

### `TEST_GPU`

_Note: Enabled only if the parameter `--gpu` is specified._

This process tests the ability to use a GPU. It uses the `pytorch` conda environment to test CUDA is available and working. This is disabled by default as it requires a GPU to be available which may not be true.
86 changes: 85 additions & 1 deletion main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -252,10 +252,92 @@ process TEST_VAL_INPUT {
"""
}

process TEST_GPU {

container 'pytorch/pytorch:latest'
conda 'pytorch::pytorch=2.5.1 pytorch::torchvision=0.20.1 nvidia::cuda=12.1'
accelerator 1
memory '10G'

input:
val input

output:
stdout


script:
"""
#!/usr/bin/env python
import torch
import time
# Function to print GPU and CUDA details
def print_gpu_info():
if torch.cuda.is_available():
gpu_name = torch.cuda.get_device_name(0)
cuda_version = torch.version.cuda
print(f"GPU: {gpu_name}")
print(f"CUDA Version: {cuda_version}")
else:
print("CUDA is not available on this system.")
# Define a simple function to perform some calculations on the CPU
def cpu_computation(size):
x = torch.rand(size, size)
y = torch.rand(size, size)
result = torch.mm(x, y)
return result
# Define a simple function to perform some calculations on the GPU
def gpu_computation(size):
x = torch.rand(size, size, device='cuda')
y = torch.rand(size, size, device='cuda')
result = torch.mm(x, y)
torch.cuda.synchronize() # Ensure the computation is done
return result
# Print GPU and CUDA details
print_gpu_info()
# Define the size of the matrices
size = 10000
# Measure time for CPU computation
start_time = time.time()
cpu_result = cpu_computation(size)
cpu_time = time.time() - start_time
print(f"CPU computation time: {cpu_time:.4f} seconds")
# Measure time for GPU computation
start_time = time.time()
gpu_result = gpu_computation(size)
gpu_time = time.time() - start_time
print(f"GPU computation time: {gpu_time:.4f} seconds")
# Optionally, verify that the results are close (they should be if the calculations are the same)
if torch.allclose(cpu_result, gpu_result.cpu()):
print("Results are close enough!")
else:
print("Results differ!")
# Print the time difference
time_difference = cpu_time - gpu_time
print(f"Time difference (CPU - GPU): {time_difference:.4f} seconds")
if time_difference < 0:
raise Exception("GPU is slower than CPU indicating no GPU utilization")
"""

}

workflow NF_CANARY {

main:

Channel.of('dummy')
.set { dummy }

// Create test file on head node
Channel
.of("alpha", "beta", "gamma")
Expand All @@ -281,6 +363,7 @@ workflow NF_CANARY {
TEST_MV_FOLDER_CONTENTS()
TEST_VAL_INPUT("Hello World")

TEST_GPU( dummy.filter { params.gpu } )
// POC of emitting the channel
Channel.empty()
.mix(
Expand All @@ -297,7 +380,8 @@ workflow NF_CANARY {
TEST_PUBLISH_FOLDER.out,
TEST_IGNORED_FAIL.out,
TEST_MV_FILE.out,
TEST_MV_FOLDER_CONTENTS.out
TEST_MV_FOLDER_CONTENTS.out,
TEST_GPU.out
)
.set { ch_out }

Expand Down
1 change: 1 addition & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
params {
skip = ''
gpu = false
run = null
outdir = null
remoteFile = null
Expand Down
4 changes: 4 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@
"help_text": "Path to a remote file to use within the pipeline. This mimics a remote set of files such as reference data that may need to be retrieved prior to analysis. By default this is not specified and the test is not ran, add a remote file using standard Nextflow filenaming to pull a file from your storage (e.g. an S3 bucket or shared storage).",
"format": "path"
},
"gpu": {
"type": "boolean",
"description": "Whether to test GPU utilization within a process."
},
"outdir": {
"type": "string",
"format": "directory-path",
Expand Down

0 comments on commit f4821c5

Please sign in to comment.