From 8007b1d14f5173a39376f9ea82ed78541778240e Mon Sep 17 00:00:00 2001 From: Phil Snyder Date: Fri, 4 Oct 2024 13:21:49 -0700 Subject: [PATCH] log formatting fixes for raw-sync lambda --- src/lambda_function/raw_sync/app.py | 18 +++++++------ tests/test_lambda_raw_sync.py | 42 ++++++++++++++++++++++++----- 2 files changed, 46 insertions(+), 14 deletions(-) diff --git a/src/lambda_function/raw_sync/app.py b/src/lambda_function/raw_sync/app.py index 4589a4d..2e2ac1c 100644 --- a/src/lambda_function/raw_sync/app.py +++ b/src/lambda_function/raw_sync/app.py @@ -375,7 +375,7 @@ def list_files_in_archive( if adjusted_range_size > total_size * 2: logger.error( "Did not find an end of central directory record in " - f"s3://{bucket}/{key}" + f"s3://{os.path.join(bucket, key)}" ) return [] logger.warning( @@ -428,7 +428,7 @@ def list_files_in_archive( file_list.append(file_object) if len(file_list) == 0: logger.warning( - f"Did not find any files in s3://{bucket}/{key} which " + f"Did not find any files in s3://{os.path.join(bucket, key)} which " "satisfy the conditions needed to be processed by the " "raw Lambda." ) @@ -501,9 +501,11 @@ def get_expected_raw_key( str: The expected S3 key of the corresponding raw object. """ file_identifier = os.path.basename(path).split(".")[0] - expected_key = ( - f"{raw_key_prefix}/dataset={data_type}" - f"/cohort={cohort}/{file_identifier}.ndjson.gz" + expected_key = os.path.join( + raw_key_prefix, + f"dataset={data_type}", + f"cohort={cohort}", + f"{file_identifier}.ndjson.gz", ) return expected_key @@ -541,7 +543,7 @@ def main( filename = file_object["filename"] logger.info( f"Checking corresponding raw object for {filename} " - f"from s3://{input_bucket}/{export_key}" + f"from s3://{os.path.join(input_bucket, export_key)}" ) data_type = get_data_type_from_path(path=filename) expected_raw_key = get_expected_raw_key( @@ -559,8 +561,8 @@ def main( if corresponding_raw_object is None: logger.info( f"Did not find corresponding raw object for {filename} from " - f"s3://{input_bucket}/{export_key} at " - f"s3://{raw_bucket}/{expected_raw_key}" + f"s3://{os.path.join(input_bucket, export_key)} at " + f"s3://{os.path.join(raw_bucket, expected_raw_key)}" ) publish_to_sns( bucket=input_bucket, diff --git a/tests/test_lambda_raw_sync.py b/tests/test_lambda_raw_sync.py index 4c30d5b..02efbdc 100644 --- a/tests/test_lambda_raw_sync.py +++ b/tests/test_lambda_raw_sync.py @@ -1,5 +1,6 @@ import io import json +import os import struct import zipfile from collections import defaultdict @@ -635,15 +636,17 @@ def test_get_data_type_from_path_deleted(): assert data_type == "HealthKitV2Samples_Deleted" -import os - - def test_get_expected_raw_key_case1(): raw_key_prefix = "test-raw_key_prefix/json" data_type = "test-data-type" cohort = "test-cohort" path = "path/to/FitbitIntradayCombined_20241111-20241112.json" - expected_key = f"{raw_key_prefix}/dataset={data_type}/cohort={cohort}/FitbitIntradayCombined_20241111-20241112.ndjson.gz" + expected_key = os.path.join( + raw_key_prefix, + f"dataset={data_type}", + f"cohort={cohort}", + "FitbitIntradayCombined_20241111-20241112.ndjson.gz", + ) assert ( app.get_expected_raw_key(raw_key_prefix, data_type, cohort, path) == expected_key @@ -655,7 +658,12 @@ def test_get_expected_raw_key_case2(): data_type = "test-data-type" cohort = "test-cohort" path = "path/to/HealthKitV2Samples_AppleStandTime_20241111-20241112.json" - expected_key = f"{raw_key_prefix}/dataset={data_type}/cohort={cohort}/HealthKitV2Samples_AppleStandTime_20241111-20241112.ndjson.gz" + expected_key = os.path.join( + raw_key_prefix, + f"dataset={data_type}", + f"cohort={cohort}", + "HealthKitV2Samples_AppleStandTime_20241111-20241112.ndjson.gz", + ) assert ( app.get_expected_raw_key(raw_key_prefix, data_type, cohort, path) == expected_key @@ -667,7 +675,29 @@ def test_get_expected_raw_key_case3(): data_type = "test-data-type" cohort = "test-cohort" path = "path/to/HealthKitV2Samples_AppleStandTime_Deleted_20241111-20241112.json" - expected_key = f"{raw_key_prefix}/dataset={data_type}/cohort={cohort}/HealthKitV2Samples_AppleStandTime_Deleted_20241111-20241112.ndjson.gz" + expected_key = os.path.join( + raw_key_prefix, + f"dataset={data_type}", + f"cohort={cohort}", + "HealthKitV2Samples_AppleStandTime_Deleted_20241111-20241112.ndjson.gz", + ) + assert ( + app.get_expected_raw_key(raw_key_prefix, data_type, cohort, path) + == expected_key + ) + + +def test_get_expected_raw_key_trailing_slash(): + raw_key_prefix = "test-raw_key_prefix/json/" + data_type = "test-data-type" + cohort = "test-cohort" + path = "path/to/HealthKitV2Samples_AppleStandTime_Deleted_20241111-20241112.json" + expected_key = os.path.join( + raw_key_prefix, + f"dataset={data_type}", + f"cohort={cohort}", + "HealthKitV2Samples_AppleStandTime_Deleted_20241111-20241112.ndjson.gz", + ) assert ( app.get_expected_raw_key(raw_key_prefix, data_type, cohort, path) == expected_key