Skip to content

Commit

Permalink
use redis completion signal to double check exit code (#3435)
Browse files Browse the repository at this point in the history
Co-authored-by: Richard Kuo (Danswer) <[email protected]>
  • Loading branch information
rkuo-danswer and Richard Kuo (Danswer) authored Dec 12, 2024
1 parent dee1a0e commit 4ae3b48
Showing 1 changed file with 32 additions and 9 deletions.
41 changes: 32 additions & 9 deletions backend/danswer/background/celery/tasks/indexing/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -640,18 +640,41 @@ def connector_indexing_proxy_task(
continue

if job.status == "error":
ignore_exitcode = False

exit_code: int | None = None
if job.process:
exit_code = job.process.exitcode
task_logger.error(
"Indexing watchdog - spawned task exceptioned: "
f"attempt={index_attempt_id} "
f"tenant={tenant_id} "
f"cc_pair={cc_pair_id} "
f"search_settings={search_settings_id} "
f"exit_code={exit_code} "
f"error={job.exception()}"
)

# seeing non-deterministic behavior where spawned tasks occasionally return exit code 1
# even though logging clearly indicates that they completed successfully
# to work around this, we ignore the job error state if the completion signal is OK
status_int = redis_connector_index.get_completion()
if status_int:
status_enum = HTTPStatus(status_int)
if status_enum == HTTPStatus.OK:
ignore_exitcode = True

if ignore_exitcode:
task_logger.warning(
"Indexing watchdog - spawned task has non-zero exit code "
"but completion signal is OK. Continuing...: "
f"attempt={index_attempt_id} "
f"tenant={tenant_id} "
f"cc_pair={cc_pair_id} "
f"search_settings={search_settings_id} "
f"exit_code={exit_code}"
)
else:
task_logger.error(
"Indexing watchdog - spawned task exceptioned: "
f"attempt={index_attempt_id} "
f"tenant={tenant_id} "
f"cc_pair={cc_pair_id} "
f"search_settings={search_settings_id} "
f"exit_code={exit_code} "
f"error={job.exception()}"
)

job.release()
break
Expand Down

0 comments on commit 4ae3b48

Please sign in to comment.