Skip to content

Commit

Permalink
⚡ update execution timeout logic more aggresively (#35) (#37)
Browse files Browse the repository at this point in the history
  • Loading branch information
dexhunter authored Jan 9, 2025
1 parent 8a75fe3 commit c92f194
Showing 1 changed file with 39 additions and 22 deletions.
61 changes: 39 additions & 22 deletions aide/interpreter.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,15 @@ def exception_summary(e, working_dir, exec_file_name, format_tb_ipython):


class RedirectQueue:
def __init__(self, queue):
def __init__(self, queue, timeout=5):
self.queue = queue
self.timeout = timeout

def write(self, msg):
self.queue.put(msg)
try:
self.queue.put(msg, timeout=self.timeout)
except queue.Full:
logger.warning("Queue write timed out")

def flush(self):
pass
Expand Down Expand Up @@ -178,17 +182,25 @@ def create_process(self) -> None:
def cleanup_session(self):
if self.process is None:
return
# give the child process a chance to terminate gracefully
self.process.terminate()
self.process.join(timeout=2)
# kill the child process if it's still alive
if self.process.exitcode is None:
logger.warning("Child process failed to terminate gracefully, killing it..")
self.process.kill()
self.process.join()
# don't wait for gc, clean up immediately
self.process.close()
self.process = None # type: ignore
try:
# Reduce grace period from 2 seconds to 0.5
self.process.terminate()
self.process.join(timeout=0.5)

if self.process.exitcode is None:
logger.warning("Process failed to terminate, killing immediately")
self.process.kill()
self.process.join(timeout=0.5)

if self.process.exitcode is None:
logger.error("Process refuses to die, using SIGKILL")
os.kill(self.process.pid, signal.SIGKILL)
except Exception as e:
logger.error(f"Error during process cleanup: {e}")
finally:
if self.process is not None:
self.process.close()
self.process = None

def run(self, code: str, reset_session=True) -> ExecutionResult:
"""
Expand Down Expand Up @@ -257,15 +269,12 @@ def run(self, code: str, reset_session=True) -> ExecutionResult:
continue
running_time = time.time() - start_time
if running_time > self.timeout:

# [TODO] handle this in a better way
assert reset_session, "Timeout ocurred in interactive session"

# send interrupt to child
os.kill(self.process.pid, signal.SIGINT) # type: ignore
logger.warning(f"Execution exceeded timeout of {self.timeout}s")
os.kill(self.process.pid, signal.SIGINT)
child_in_overtime = True
# terminate if we're overtime by more than a minute
if running_time > self.timeout + 60:

# terminate if we're overtime by more than 5 seconds
if running_time > self.timeout + 5:
logger.warning("Child failed to terminate, killing it..")
self.cleanup_session()

Expand All @@ -277,8 +286,16 @@ def run(self, code: str, reset_session=True) -> ExecutionResult:
# read all stdout/stderr from child up to the EOF marker
# waiting until the queue is empty is not enough since
# the feeder thread in child might still be adding to the queue
start_collect = time.time()
while not self.result_outq.empty() or not output or output[-1] != "<|EOF|>":
output.append(self.result_outq.get())
try:
# Add 5-second timeout for output collection
if time.time() - start_collect > 5:
logger.warning("Output collection timed out")
break
output.append(self.result_outq.get(timeout=1))
except queue.Empty:
continue
output.pop() # remove the EOF marker

e_cls_name, exc_info, exc_stack = state[1:]
Expand Down

0 comments on commit c92f194

Please sign in to comment.