Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Windows Threading Issues #385

Open
wants to merge 19 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci-testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ jobs:
pip list

- name: Tests
timeout-minutes: 10
timeout-minutes: 30
Copy link
Collaborator

@aniketmaurya aniketmaurya Dec 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's switch it back after figuring out the reason CI is stuck since we don't want to run tests for 30 mins.

Suggested change
timeout-minutes: 30
timeout-minutes: 10

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems CI is stuck since this commit 2bbed42

Maybe due to version 3.11 of python for a reason i ignore.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, since it's only specific to Python 3.11 on Windows latest, it probably means that something is not working as expected.

run: |
python -m pytest --cov=litserve src/ tests/ -v -s

Expand Down
56 changes: 45 additions & 11 deletions src/litserve/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,13 @@
from collections import deque
from concurrent.futures import ThreadPoolExecutor
from contextlib import asynccontextmanager
from multiprocessing.context import Process
from queue import Empty
from threading import Thread
from typing import Callable, Dict, List, Optional, Sequence, Tuple, Union

import uvicorn
import uvicorn.server
from fastapi import Depends, FastAPI, HTTPException, Request, Response
from fastapi.responses import JSONResponse, StreamingResponse
from fastapi.security import APIKeyHeader
Expand Down Expand Up @@ -233,6 +236,7 @@ def __init__(
self.model_metadata = model_metadata
self._connector = _Connector(accelerator=accelerator, devices=devices)
self._callback_runner = CallbackRunner(callbacks)
self._uvicorn_servers = None

specs = spec if spec is not None else []
self._specs = specs if isinstance(specs, Sequence) else [specs]
Expand Down Expand Up @@ -559,19 +563,40 @@ def run(
elif api_server_worker_type is None:
api_server_worker_type = "process"

manager, litserve_workers = self.launch_inference_worker(num_api_servers)
manager, inference_workers = self.launch_inference_worker(num_api_servers)

self.verify_worker_status()
try:
servers = self._start_server(port, num_api_servers, log_level, sockets, api_server_worker_type, **kwargs)
uvicorn_workers = self._start_server(
port, num_api_servers, log_level, sockets, api_server_worker_type, **kwargs
)
print(f"Swagger UI is available at http://0.0.0.0:{port}/docs")
for s in servers:
s.join()
if sys.platform != "win32":
# On Linux, kill signal will be captured by uvicorn.
# => They will join and raise a KeyboardInterrupt, allowing to Shutdown server.
for uw in uvicorn_workers:
uw: Union[Process, Thread]
uw.join()
else:
# On Windows, kill signal is captured by inference workers.
# => They will join and raise a KeyboardInterrupt, allowing to Shutdown Server
for iw in inference_workers:
iw: Process
iw.join()
except KeyboardInterrupt:
# KeyboardInterruption
if sys.platform == "win32":
# We kindly ask uvicorn servers to exit.
# It will properly end threads on windows.
for us in self._uvicorn_servers:
us: uvicorn.Server
us.should_exit = True
finally:
print("Shutting down LitServe")
for w in litserve_workers:
w.terminate()
w.join()
for iw in inference_workers:
iw: Process
iw.terminate()
iw.join()
manager.shutdown()

def _prepare_app_run(self, app: FastAPI):
Expand All @@ -581,16 +606,24 @@ def _prepare_app_run(self, app: FastAPI):
app.add_middleware(RequestCountMiddleware, active_counter=active_counter)

def _start_server(self, port, num_uvicorn_servers, log_level, sockets, uvicorn_worker_type, **kwargs):
servers = []
workers = []
self._uvicorn_servers = []
for response_queue_id in range(num_uvicorn_servers):
self.app.response_queue_id = response_queue_id
if self.lit_spec:
self.lit_spec.response_queue_id = response_queue_id
app: FastAPI = copy.copy(self.app)

self._prepare_app_run(app)

config = uvicorn.Config(app=app, host="0.0.0.0", port=port, log_level=log_level, **kwargs)
if sys.platform == "win32" and num_uvicorn_servers > 1:
logger.debug("Enable Windows explicit socket sharing...")
# We make sure sockets is listening...
# It prevents further [WinError 10022]
[sock.listen(config.backlog) for sock in sockets]
# We add worker to say unicorn to use a shared socket (win32)
# https://github.com/encode/uvicorn/pull/802
config.workers = num_uvicorn_servers
server = uvicorn.Server(config=config)
if uvicorn_worker_type == "process":
ctx = mp.get_context("fork")
Expand All @@ -600,8 +633,9 @@ def _start_server(self, port, num_uvicorn_servers, log_level, sockets, uvicorn_w
else:
raise ValueError("Invalid value for api_server_worker_type. Must be 'process' or 'thread'")
w.start()
servers.append(w)
return servers
workers.append(w)
self._uvicorn_servers.append(server)
return workers

def setup_auth(self):
if hasattr(self.lit_api, "authorize") and callable(self.lit_api.authorize):
Expand Down
Loading