From bebde5409a1be6477881e2cd4e26fe569a5a4313 Mon Sep 17 00:00:00 2001
From: haiqi96 <14502009+haiqi96@users.noreply.github.com>
Date: Wed, 26 Jun 2024 10:19:55 -0400
Subject: [PATCH] Apply suggestions from code review

Co-authored-by: kirkrodrigues <2454684+kirkrodrigues@users.noreply.github.com>
---
 .../executor/query/fs_search_task.py            |  2 +-
 .../scheduler/query/query_scheduler.py          | 17 ++++++++---------
 .../package-template/src/etc/clp-config.yml     |  5 ++---
 3 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/components/job-orchestration/job_orchestration/executor/query/fs_search_task.py b/components/job-orchestration/job_orchestration/executor/query/fs_search_task.py
index 844af24e7..169aeb774 100644
--- a/components/job-orchestration/job_orchestration/executor/query/fs_search_task.py
+++ b/components/job-orchestration/job_orchestration/executor/query/fs_search_task.py
@@ -114,7 +114,7 @@ def search(
     clo_log_path = get_logger_file_path(clp_logs_dir, job_id, task_id)
     clo_log_file = open(clo_log_path, "w")
 
-    logger.info(f"Started task for job {job_id}")
+    logger.info(f"Started search task for job {job_id}")
 
     search_config = SearchJobConfig.parse_obj(job_config_obj)
     sql_adapter = SQL_Adapter(Database.parse_obj(clp_metadata_db_conn_params))
diff --git a/components/job-orchestration/job_orchestration/scheduler/query/query_scheduler.py b/components/job-orchestration/job_orchestration/scheduler/query/query_scheduler.py
index 7787cd414..d51642a7b 100644
--- a/components/job-orchestration/job_orchestration/scheduler/query/query_scheduler.py
+++ b/components/job-orchestration/job_orchestration/scheduler/query/query_scheduler.py
@@ -283,16 +283,15 @@ def get_archive_and_update_config_for_extraction(
     db_conn,
     extract_ir_config: ExtractIrJobConfig,
 ) -> Optional[str]:
-
     orig_file_id = extract_ir_config.orig_file_id
     msg_ix = extract_ir_config.msg_ix
 
     results = get_archive_and_file_split_for_extraction(db_conn, orig_file_id, msg_ix)
     if len(results) == 0:
-        logger.error(f"No file split and archive match with config: {orig_file_id}:{msg_ix}")
+        logger.error(f"No matching file splits for orig_file_id={orig_file_id}, msg_ix={msg_ix}")
         return None
     elif len(results) > 1:
-        logger.error(f"Multiple splits match with config: {orig_file_id}:{msg_ix}")
+        logger.error(f"Multiple file splits found for orig_file_id={orig_file_id}, msg_ix={msg_ix}")
         for result in results:
             logger.error(f"{result['archive_id']}:{result['id']}")
         return None
@@ -532,7 +531,7 @@ def handle_pending_query_jobs(
                         num_tasks=0,
                         duration=0,
                     ):
-                        logger.error(f"Failed to set job: {job_id} as failed")
+                        logger.error(f"Failed to set job {job_id} as failed")
                     continue
 
                 new_extract_ir_job = ExtractIrJob(
@@ -712,7 +711,7 @@ async def handle_finished_search_job(
 
 
 async def handle_finished_extract_ir_job(
-    db_conn, job: SearchJob, task_results: Optional[Any]
+    db_conn, job: ExtractIrJob, task_results: Optional[Any]
 ) -> None:
     global active_jobs
 
@@ -721,8 +720,8 @@ async def handle_finished_extract_ir_job(
     num_task = len(task_results)
     if 1 != num_task:
         logger.error(
-            f"Unexpected number of task under IR extraction job: {job_id}. "
-            f"expected 1, got {num_task}"
+            f"Unexpected number of tasks for IR extraction job {job_id}. "
+            f"Expected 1, got {num_tasks}."
         )
         new_job_status = QueryJobStatus.FAILED
     else:
@@ -750,9 +749,9 @@ async def handle_finished_extract_ir_job(
         duration=(datetime.datetime.now() - job.start_time).total_seconds(),
     ):
         if new_job_status == QueryJobStatus.SUCCEEDED:
-            logger.info(f"Completed job {job_id}.")
+            logger.info(f"Completed IR extraction job {job_id}.")
         else:
-            logger.info(f"Completed job {job_id} with failing tasks.")
+            logger.info(f"Completed IR extraction job {job_id} with failing tasks.")
     del active_jobs[job_id]
 
 
diff --git a/components/package-template/src/etc/clp-config.yml b/components/package-template/src/etc/clp-config.yml
index 15fbcc2d3..98759a041 100644
--- a/components/package-template/src/etc/clp-config.yml
+++ b/components/package-template/src/etc/clp-config.yml
@@ -78,12 +78,11 @@
 #  # How much data CLP should try to fit into each segment within an archive
 #  target_segment_size: 268435456  # 256 MB
 #
-## Where IR should be output to
+## Where CLP IR files should be output
 #ir_output:
 #  directory: "var/data/ir"
 #
-#  # How large each IR chunk should be before being
-#  # split into a new IR chunk
+#  # How large each IR file should be before being split into a new IR file
 #  target_uncompressed_size: 134217728  # 128 MB
 #
 ## Location where other data (besides archives) are stored. It will be created if