Skip to content

Commit

Permalink
call destroy_distributed_environment atexit
Browse files Browse the repository at this point in the history
Signed-off-by: Tyler Michael Smith <[email protected]>
  • Loading branch information
tlrmchlsmth committed Dec 2, 2024
1 parent f8a1b9b commit 963c97f
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 12 deletions.
10 changes: 0 additions & 10 deletions vllm/distributed/parallel_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -1169,16 +1169,6 @@ def destroy_model_parallel():
_PP = None


# In V1, Calling _TP.destroy() results in 2 leaked shared memory objects. This
# is related to the torch.distributed.destroy_process_group calls. However, not
# cleaning up its mq_broadcaster results in 1 leaked shm object.
# TODO: Fix up this hack
def destroy_tp_mq_broadcaster():
global _TP
if _TP:
_TP.mq_broadcaster = None


def destroy_distributed_environment():
global _WORLD
if _WORLD:
Expand Down
5 changes: 3 additions & 2 deletions vllm/v1/worker/gpu_worker.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""A GPU worker class."""
import atexit
import gc
import os
import pickle
Expand All @@ -13,7 +14,7 @@

import vllm.envs as envs
from vllm.config import CacheConfig, ModelConfig, ParallelConfig, VllmConfig
from vllm.distributed import (destroy_tp_mq_broadcaster,
from vllm.distributed import (destroy_distributed_environment,
ensure_model_parallel_initialized,
init_distributed_environment,
set_custom_all_reduce)
Expand Down Expand Up @@ -392,6 +393,7 @@ def make_worker_process(
@staticmethod
def run_worker(*args, **kwargs):
"""Launch Worker busy loop in background process."""
atexit.register(destroy_distributed_environment)

try:
worker = WorkerProc(*args, **kwargs)
Expand All @@ -403,7 +405,6 @@ def run_worker(*args, **kwargs):

# Clean up once worker exits busy loop
worker = None
destroy_tp_mq_broadcaster()

except KeyboardInterrupt:
logger.debug("Worker interrupted.")
Expand Down

0 comments on commit 963c97f

Please sign in to comment.