From bebde5409a1be6477881e2cd4e26fe569a5a4313 Mon Sep 17 00:00:00 2001 From: haiqi96 <14502009+haiqi96@users.noreply.github.com> Date: Wed, 26 Jun 2024 10:19:55 -0400 Subject: [PATCH] Apply suggestions from code review Co-authored-by: kirkrodrigues <2454684+kirkrodrigues@users.noreply.github.com> --- .../executor/query/fs_search_task.py | 2 +- .../scheduler/query/query_scheduler.py | 17 ++++++++--------- .../package-template/src/etc/clp-config.yml | 5 ++--- 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/components/job-orchestration/job_orchestration/executor/query/fs_search_task.py b/components/job-orchestration/job_orchestration/executor/query/fs_search_task.py index 844af24e7..169aeb774 100644 --- a/components/job-orchestration/job_orchestration/executor/query/fs_search_task.py +++ b/components/job-orchestration/job_orchestration/executor/query/fs_search_task.py @@ -114,7 +114,7 @@ def search( clo_log_path = get_logger_file_path(clp_logs_dir, job_id, task_id) clo_log_file = open(clo_log_path, "w") - logger.info(f"Started task for job {job_id}") + logger.info(f"Started search task for job {job_id}") search_config = SearchJobConfig.parse_obj(job_config_obj) sql_adapter = SQL_Adapter(Database.parse_obj(clp_metadata_db_conn_params)) diff --git a/components/job-orchestration/job_orchestration/scheduler/query/query_scheduler.py b/components/job-orchestration/job_orchestration/scheduler/query/query_scheduler.py index 7787cd414..d51642a7b 100644 --- a/components/job-orchestration/job_orchestration/scheduler/query/query_scheduler.py +++ b/components/job-orchestration/job_orchestration/scheduler/query/query_scheduler.py @@ -283,16 +283,15 @@ def get_archive_and_update_config_for_extraction( db_conn, extract_ir_config: ExtractIrJobConfig, ) -> Optional[str]: - orig_file_id = extract_ir_config.orig_file_id msg_ix = extract_ir_config.msg_ix results = get_archive_and_file_split_for_extraction(db_conn, orig_file_id, msg_ix) if len(results) == 0: - logger.error(f"No file split and archive match with config: {orig_file_id}:{msg_ix}") + logger.error(f"No matching file splits for orig_file_id={orig_file_id}, msg_ix={msg_ix}") return None elif len(results) > 1: - logger.error(f"Multiple splits match with config: {orig_file_id}:{msg_ix}") + logger.error(f"Multiple file splits found for orig_file_id={orig_file_id}, msg_ix={msg_ix}") for result in results: logger.error(f"{result['archive_id']}:{result['id']}") return None @@ -532,7 +531,7 @@ def handle_pending_query_jobs( num_tasks=0, duration=0, ): - logger.error(f"Failed to set job: {job_id} as failed") + logger.error(f"Failed to set job {job_id} as failed") continue new_extract_ir_job = ExtractIrJob( @@ -712,7 +711,7 @@ async def handle_finished_search_job( async def handle_finished_extract_ir_job( - db_conn, job: SearchJob, task_results: Optional[Any] + db_conn, job: ExtractIrJob, task_results: Optional[Any] ) -> None: global active_jobs @@ -721,8 +720,8 @@ async def handle_finished_extract_ir_job( num_task = len(task_results) if 1 != num_task: logger.error( - f"Unexpected number of task under IR extraction job: {job_id}. " - f"expected 1, got {num_task}" + f"Unexpected number of tasks for IR extraction job {job_id}. " + f"Expected 1, got {num_tasks}." ) new_job_status = QueryJobStatus.FAILED else: @@ -750,9 +749,9 @@ async def handle_finished_extract_ir_job( duration=(datetime.datetime.now() - job.start_time).total_seconds(), ): if new_job_status == QueryJobStatus.SUCCEEDED: - logger.info(f"Completed job {job_id}.") + logger.info(f"Completed IR extraction job {job_id}.") else: - logger.info(f"Completed job {job_id} with failing tasks.") + logger.info(f"Completed IR extraction job {job_id} with failing tasks.") del active_jobs[job_id] diff --git a/components/package-template/src/etc/clp-config.yml b/components/package-template/src/etc/clp-config.yml index 15fbcc2d3..98759a041 100644 --- a/components/package-template/src/etc/clp-config.yml +++ b/components/package-template/src/etc/clp-config.yml @@ -78,12 +78,11 @@ # # How much data CLP should try to fit into each segment within an archive # target_segment_size: 268435456 # 256 MB # -## Where IR should be output to +## Where CLP IR files should be output #ir_output: # directory: "var/data/ir" # -# # How large each IR chunk should be before being -# # split into a new IR chunk +# # How large each IR file should be before being split into a new IR file # target_uncompressed_size: 134217728 # 128 MB # ## Location where other data (besides archives) are stored. It will be created if