Skip to content

Commit

Permalink
Use a separate thread to read stderr when needed
Browse files Browse the repository at this point in the history
  • Loading branch information
almet committed Dec 19, 2024
1 parent d66af44 commit 4f4c523
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 3 deletions.
3 changes: 2 additions & 1 deletion dangerzone/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ def print_header(s: str) -> None:
"--debug",
"debug",
flag_value=True,
help="Run Dangerzone in debug mode, to get logs from gVisor.")
help="Run Dangerzone in debug mode, to get logs from gVisor.",
)
@click.version_option(version=get_version(), message="%(version)s")
@errors.handle_document_errors
def cli_main(
Expand Down
23 changes: 23 additions & 0 deletions dangerzone/isolation_provider/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
import signal
import subprocess
import sys
import threading
from abc import ABC, abstractmethod
from queue import Queue
from typing import IO, Callable, Iterator, Optional

import fitz
Expand Down Expand Up @@ -92,6 +94,7 @@ def __init__(self, debug: bool = False) -> None:
self.proc_stderr = subprocess.PIPE
else:
self.proc_stderr = subprocess.DEVNULL
self.stderr_queue: Queue = Queue()

def should_capture_stderr(self) -> bool:
return self.debug or getattr(sys, "dangerzone_dev", False)
Expand Down Expand Up @@ -363,3 +366,23 @@ def doc_to_pixels_proc(
f"{debug_log}" # no need for an extra newline here
f"{DOC_TO_PIXELS_LOG_END}"
)

def _stream_stderr(self, stderr: IO[bytes], queue: Queue) -> None:
"""Read stderr in a separate thread to avoid blocking"""
try:
for line in stderr:
queue.put(line)
if self.debug:
log.debug(line.decode().strip())
except (ValueError, IOError) as e:
log.debug(f"Stderr stream closed: {e}")

def start_stderr_thread(self, process: subprocess.Popen) -> None:
"""Start a thread to read stderr from the process"""
if process.stderr:
stderr_thread = threading.Thread(
target=self._stream_stderr,
args=(process.stderr, self.stderr_queue),
daemon=True,
)
stderr_thread.start()
11 changes: 9 additions & 2 deletions dangerzone/isolation_provider/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def exec(
args,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=self.proc_stderr,
stderr=subprocess.PIPE,
startupinfo=startupinfo,
# Start the conversion process in a new session, so that we can later on
# kill the process group, without killing the controlling script.
Expand Down Expand Up @@ -189,7 +189,14 @@ def exec_container(
+ command
)
args = [container_runtime] + args
return self.exec(args)
args_str = " ".join(shlex.quote(s) for s in args)
log.info("> " + args_str)

process = self.exec(args)
# Start stderr reader thread, attaching it to the process
self.start_stderr_thread(process)

return process

def kill_container(self, name: str) -> None:
"""Terminate a spawned container.
Expand Down
14 changes: 14 additions & 0 deletions dangerzone/logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,25 @@ def convert_documents(
) -> None:
def convert_doc(document: Document) -> None:
try:
# Clear any existing stderr output
while not self.isolation_provider.stderr_queue.empty():
self.isolation_provider.stderr_queue.get_nowait()

self.isolation_provider.convert(
document,
ocr_lang,
stdout_callback,
)

# Process any stderr output that was captured
while not self.isolation_provider.stderr_queue.empty():
try:
line = self.isolation_provider.stderr_queue.get_nowait()
if stdout_callback:
stdout_callback(True, line.decode().strip(), -1)
except Exception as e:
log.error(f"Error processing stderr: {e}")

except Exception:
log.exception(
f"Unexpected error occurred while converting '{document}'"
Expand Down

0 comments on commit 4f4c523

Please sign in to comment.