From 3507cd1dc533c374d8d2c81048601c20e7a0babb Mon Sep 17 00:00:00 2001 From: Jee Jee Li Date: Mon, 25 Nov 2024 06:26:36 +0000 Subject: [PATCH] Done Signed-off-by: Jee Jee Li --- vllm/plugins/__init__.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/vllm/plugins/__init__.py b/vllm/plugins/__init__.py index d5056b18fe968..6836da698b831 100644 --- a/vllm/plugins/__init__.py +++ b/vllm/plugins/__init__.py @@ -14,6 +14,14 @@ plugins_loaded = False +def _force_torch_inductor_compile_threads(thread_num: int): + import torch + + # see https://github.com/vllm-project/vllm/issues/10619 + # A hacky way to limit the number of threads + torch._inductor.config.compile_threads = thread_num + + def load_general_plugins(): """WARNING: plugins can be loaded for multiple times in different processes. They should be designed in a way that they can be loaded @@ -26,7 +34,7 @@ def load_general_plugins(): # see https://github.com/vllm-project/vllm/issues/10480 os.environ['TORCHINDUCTOR_COMPILE_THREADS'] = '1' - + _force_torch_inductor_compile_threads(thread_num=1) global plugins_loaded if plugins_loaded: return