rapidsai · pentschev · Oct 23, 2024 · Oct 23, 2024 · pentschev · Oct 23, 2024
@@ -6,7 +6,7 @@
 import os
 import warnings
 
-from toolz import valmap
+from toolz import itemfilter, valmap
 
 import dask
 from distributed import Nanny
@@ -103,11 +103,15 @@ def del_pid_file():
             atexit.register(del_pid_file)
 
         if resources:
-            resources = resources.replace(",", " ").split()
+            resources = resources.replace(",", " ").replace("'", "").split()
             resources = dict(pair.split("=") for pair in resources)
-            resources = valmap(float, resources)
+            gpu_resources = valmap(int, itemfilter(lambda x: x != "GPU", resources))
+            resources = valmap(float, itemfilter(lambda x: x == "GPU", resources))
+            resources.update(gpu_resources)
+            if "GPU" not in resources:
+                resources["GPU"] = 1
         else:
-            resources = None
+            resources = {"GPU": 1}
 
         preload_argv = kwargs.pop("preload_argv", [])
         kwargs = {"worker_port": None, "listen_address": None, **kwargs}

@@ -23,17 +23,17 @@
 
 
 class LoggedWorker(Worker):
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
+    def __init__(self, *args, **worker_kwargs):
+        super().__init__(*args, **worker_kwargs)
 
     async def start(self):
         await super().start()
         self.data.set_address(self.address)
 
 
 class LoggedNanny(Nanny):
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, worker_class=LoggedWorker, **kwargs)
+    def __init__(self, *args, **worker_kwargs):
+        super().__init__(*args, worker_class=LoggedWorker, **worker_kwargs)
 
 
 class LocalCUDACluster(LocalCluster):
@@ -244,7 +244,7 @@ def __init__(
         log_spilling=False,
         worker_class=None,
         pre_import=None,
-        **kwargs,
+        **worker_kwargs,
     ):
         # Required by RAPIDS libraries (e.g., cuDF) to ensure no context
         # initialization happens before we can set CUDA_VISIBLE_DEVICES
@@ -326,7 +326,7 @@ def __init__(
         self.rmm_log_directory = rmm_log_directory
         self.rmm_track_allocations = rmm_track_allocations
 
-        if not kwargs.pop("processes", True):
+        if not worker_kwargs.pop("processes", True):
             raise ValueError(
                 "Processes are necessary in order to use multiple GPUs with Dask"
             )
@@ -337,7 +337,7 @@ def __init__(
 
         if jit_unspill is None:
             jit_unspill = dask.config.get("jit-unspill", default=False)
-        data = kwargs.pop("data", None)
+        data = worker_kwargs.pop("data", None)
         if data is None:
             if device_memory_limit is None and memory_limit is None:
                 data = {}
@@ -375,7 +375,7 @@ def __init__(
                     "protocol='ucxx'"
                 )
 
-        self.host = kwargs.get("host", None)
+        self.host = worker_kwargs.get("host", None)
 
         initialize(
             create_cuda_context=False,
@@ -399,6 +399,12 @@ def __init__(
 
         self.pre_import = pre_import
 
+        if "resources" in worker_kwargs:
+            if "GPU" not in worker_kwargs["resources"]:
+                worker_kwargs["GPU"] = 1
+        else:
+            worker_kwargs["resources"] = {"GPU": 1}
+
         super().__init__(
             n_workers=0,
             threads_per_worker=threads_per_worker,
@@ -416,7 +422,7 @@ def __init__(
                     enable_rdmacm=enable_rdmacm,
                 )
             },
-            **kwargs,
+            **worker_kwargs,
         )
 
         self.new_spec["options"]["preload"] = self.new_spec["options"].get(

@@ -594,3 +594,45 @@ def test_worker_cudf_spill_warning(enable_cudf_spill_warning):  # noqa: F811
             assert b"UserWarning: cuDF spilling is enabled" in ret.stderr
         else:
             assert b"UserWarning: cuDF spilling is enabled" not in ret.stderr
+
+
+def test_worker_gpu_resource(loop):  # noqa: F811
+    with popen(["dask", "scheduler", "--port", "9369", "--no-dashboard"]):
+        with popen(
+            [
+                "dask",
+                "cuda",
+                "worker",
+                "127.0.0.1:9369",
+                "--no-dashboard",
+            ]
+        ):
+            with Client("127.0.0.1:9369", loop=loop) as client:
+                assert wait_workers(client, n_gpus=get_n_gpus())
+
+                workers = client.scheduler_info()["workers"]
+                for v in workers.values():
+                    assert "GPU" in v["resources"]
+                    assert v["resources"]["GPU"] == 1
+
+
+def test_worker_gpu_resource_user_defined(loop):  # noqa: F811
+    with popen(["dask", "scheduler", "--port", "9369", "--no-dashboard"]):
+        with popen(
+            [
+                "dask",
+                "cuda",
+                "worker",
+                "127.0.0.1:9369",
+                "--resources",
+                "'GPU=55'",
+                "--no-dashboard",
+            ]
+        ):
+            with Client("127.0.0.1:9369", loop=loop) as client:
+                assert wait_workers(client, n_gpus=get_n_gpus())
+
+                workers = client.scheduler_info()["workers"]
+                for v in workers.values():
+                    assert "GPU" in v["resources"]
+                    assert v["resources"]["GPU"] == 55
@@ -217,6 +217,26 @@ async def test_all_to_all():
             assert all(all_data.count(i) == n_workers for i in all_data)
 
 
+@gen_test(timeout=20)
+async def test_worker_gpu_resource():
+    async with LocalCUDACluster(asynchronous=True) as cluster:
+        async with Client(cluster, asynchronous=True) as client:
+            workers = client.scheduler_info()["workers"]
+            for v in workers.values():
+                assert "GPU" in v["resources"]
+                assert v["resources"]["GPU"] == 1
+
+
+@gen_test(timeout=20)
+async def test_worker_gpu_resource_user_defined():
+    async with LocalCUDACluster(asynchronous=True, resources={"GPU": 55}) as cluster:
+        async with Client(cluster, asynchronous=True) as client:
+            workers = client.scheduler_info()["workers"]
+            for v in workers.values():
+                assert "GPU" in v["resources"]
+                assert v["resources"]["GPU"] == 55
+
+
 @gen_test(timeout=20)
 async def test_rmm_pool():
     rmm = pytest.importorskip("rmm")