From 3507cd1dc533c374d8d2c81048601c20e7a0babb Mon Sep 17 00:00:00 2001
From: Jee Jee Li <pandaleefree@gmail.com>
Date: Mon, 25 Nov 2024 06:26:36 +0000
Subject: [PATCH] Done

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
---
 vllm/plugins/__init__.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/vllm/plugins/__init__.py b/vllm/plugins/__init__.py
index d5056b18fe968..6836da698b831 100644
--- a/vllm/plugins/__init__.py
+++ b/vllm/plugins/__init__.py
@@ -14,6 +14,14 @@
 plugins_loaded = False
 
 
+def _force_torch_inductor_compile_threads(thread_num: int):
+    import torch
+
+    # see https://github.com/vllm-project/vllm/issues/10619
+    # A hacky way to limit the number of threads
+    torch._inductor.config.compile_threads = thread_num
+
+
 def load_general_plugins():
     """WARNING: plugins can be loaded for multiple times in different
     processes. They should be designed in a way that they can be loaded
@@ -26,7 +34,7 @@ def load_general_plugins():
 
     # see https://github.com/vllm-project/vllm/issues/10480
     os.environ['TORCHINDUCTOR_COMPILE_THREADS'] = '1'
-
+    _force_torch_inductor_compile_threads(thread_num=1)
     global plugins_loaded
     if plugins_loaded:
         return