Add GPU test (#30)

Adds GPU test originally built by @robsyme.
seqeralabs · Dec 3, 2024 · f4821c5 · f4821c5
1 parent 379a4fa
commit f4821c5
Show file tree

Hide file tree

Showing 4 changed files with 96 additions and 1 deletion.
diff --git a/README.md b/README.md
@@ -124,3 +124,9 @@ Tests moving the contents of a folder to a new folder within the working directo
 ### `TEST_VAL_INPUT`
 
 Test a process can accept a value as input.
+
+### `TEST_GPU`
+
+_Note: Enabled only if the parameter `--gpu` is specified._
+
+This process tests the ability to use a GPU. It uses the `pytorch` conda environment to test CUDA is available and working. This is disabled by default as it requires a GPU to be available which may not be true.
diff --git a/main.nf b/main.nf
@@ -252,10 +252,92 @@ process TEST_VAL_INPUT {
     """
 }
 
+process TEST_GPU {
+
+    container 'pytorch/pytorch:latest'
+    conda 'pytorch::pytorch=2.5.1 pytorch::torchvision=0.20.1 nvidia::cuda=12.1'
+    accelerator 1
+    memory '10G'
+
+    input:
+        val input
+
+    output:
+        stdout
+
+
+    script:
+    """
+    #!/usr/bin/env python
+    import torch
+    import time
+
+    # Function to print GPU and CUDA details
+    def print_gpu_info():
+        if torch.cuda.is_available():
+            gpu_name = torch.cuda.get_device_name(0)
+            cuda_version = torch.version.cuda
+            print(f"GPU: {gpu_name}")
+            print(f"CUDA Version: {cuda_version}")
+        else:
+            print("CUDA is not available on this system.")
+
+    # Define a simple function to perform some calculations on the CPU
+    def cpu_computation(size):
+        x = torch.rand(size, size)
+        y = torch.rand(size, size)
+        result = torch.mm(x, y)
+        return result
+
+    # Define a simple function to perform some calculations on the GPU
+    def gpu_computation(size):
+        x = torch.rand(size, size, device='cuda')
+        y = torch.rand(size, size, device='cuda')
+        result = torch.mm(x, y)
+        torch.cuda.synchronize()  # Ensure the computation is done
+        return result
+
+    # Print GPU and CUDA details
+    print_gpu_info()
+
+    # Define the size of the matrices
+    size = 10000
+
+    # Measure time for CPU computation
+    start_time = time.time()
+    cpu_result = cpu_computation(size)
+    cpu_time = time.time() - start_time
+    print(f"CPU computation time: {cpu_time:.4f} seconds")
+
+    # Measure time for GPU computation
+    start_time = time.time()
+    gpu_result = gpu_computation(size)
+    gpu_time = time.time() - start_time
+    print(f"GPU computation time: {gpu_time:.4f} seconds")
+
+    # Optionally, verify that the results are close (they should be if the calculations are the same)
+    if torch.allclose(cpu_result, gpu_result.cpu()):
+        print("Results are close enough!")
+    else:
+        print("Results differ!")
+
+    # Print the time difference
+    time_difference = cpu_time - gpu_time
+    print(f"Time difference (CPU - GPU): {time_difference:.4f} seconds")
+
+    if time_difference < 0:
+        raise Exception("GPU is slower than CPU indicating no GPU utilization")
+    """
+
+}
+
 workflow NF_CANARY {
 
     main:
 
+        Channel.of('dummy')
+            .set { dummy }
+
         // Create test file on head node
         Channel
             .of("alpha", "beta", "gamma")
@@ -281,6 +363,7 @@ workflow NF_CANARY {
         TEST_MV_FOLDER_CONTENTS()
         TEST_VAL_INPUT("Hello World")
 
+        TEST_GPU( dummy.filter { params.gpu } )
         // POC of emitting the channel
         Channel.empty()
             .mix(
@@ -297,7 +380,8 @@ workflow NF_CANARY {
                 TEST_PUBLISH_FOLDER.out,
                 TEST_IGNORED_FAIL.out,
                 TEST_MV_FILE.out,
-                TEST_MV_FOLDER_CONTENTS.out
+                TEST_MV_FOLDER_CONTENTS.out,
+                TEST_GPU.out
             )
             .set { ch_out }
 

diff --git a/nextflow.config b/nextflow.config
@@ -1,5 +1,6 @@
 params {
     skip       = ''
+    gpu        = false
     run        = null
     outdir     = null
     remoteFile = null

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -28,6 +28,10 @@
                     "help_text": "Path to a remote file to use within the pipeline. This mimics a remote set of files such as reference data that may need to be retrieved prior to analysis. By default this is not specified and the test is not ran, add a remote file using standard Nextflow filenaming to pull a file from your storage (e.g. an S3 bucket or shared storage).",
                     "format": "path"
                 },
+                "gpu": {
+                    "type": "boolean",
+                    "description": "Whether to test GPU utilization within a process."
+                },
                 "outdir": {
                     "type": "string",
                     "format": "directory-path",