Skip to content

Commit

Permalink
log formatting fixes for raw-sync lambda
Browse files Browse the repository at this point in the history
  • Loading branch information
philerooski committed Oct 7, 2024
1 parent 5e7c861 commit 8007b1d
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 14 deletions.
18 changes: 10 additions & 8 deletions src/lambda_function/raw_sync/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ def list_files_in_archive(
if adjusted_range_size > total_size * 2:
logger.error(
"Did not find an end of central directory record in "
f"s3://{bucket}/{key}"
f"s3://{os.path.join(bucket, key)}"
)
return []
logger.warning(
Expand Down Expand Up @@ -428,7 +428,7 @@ def list_files_in_archive(
file_list.append(file_object)
if len(file_list) == 0:
logger.warning(
f"Did not find any files in s3://{bucket}/{key} which "
f"Did not find any files in s3://{os.path.join(bucket, key)} which "
"satisfy the conditions needed to be processed by the "
"raw Lambda."
)
Expand Down Expand Up @@ -501,9 +501,11 @@ def get_expected_raw_key(
str: The expected S3 key of the corresponding raw object.
"""
file_identifier = os.path.basename(path).split(".")[0]
expected_key = (
f"{raw_key_prefix}/dataset={data_type}"
f"/cohort={cohort}/{file_identifier}.ndjson.gz"
expected_key = os.path.join(
raw_key_prefix,
f"dataset={data_type}",
f"cohort={cohort}",
f"{file_identifier}.ndjson.gz",
)
return expected_key

Expand Down Expand Up @@ -541,7 +543,7 @@ def main(
filename = file_object["filename"]
logger.info(
f"Checking corresponding raw object for {filename} "
f"from s3://{input_bucket}/{export_key}"
f"from s3://{os.path.join(input_bucket, export_key)}"
)
data_type = get_data_type_from_path(path=filename)
expected_raw_key = get_expected_raw_key(
Expand All @@ -559,8 +561,8 @@ def main(
if corresponding_raw_object is None:
logger.info(
f"Did not find corresponding raw object for {filename} from "
f"s3://{input_bucket}/{export_key} at "
f"s3://{raw_bucket}/{expected_raw_key}"
f"s3://{os.path.join(input_bucket, export_key)} at "
f"s3://{os.path.join(raw_bucket, expected_raw_key)}"
)
publish_to_sns(
bucket=input_bucket,
Expand Down
42 changes: 36 additions & 6 deletions tests/test_lambda_raw_sync.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import io
import json
import os
import struct
import zipfile
from collections import defaultdict
Expand Down Expand Up @@ -635,15 +636,17 @@ def test_get_data_type_from_path_deleted():
assert data_type == "HealthKitV2Samples_Deleted"


import os


def test_get_expected_raw_key_case1():
raw_key_prefix = "test-raw_key_prefix/json"
data_type = "test-data-type"
cohort = "test-cohort"
path = "path/to/FitbitIntradayCombined_20241111-20241112.json"
expected_key = f"{raw_key_prefix}/dataset={data_type}/cohort={cohort}/FitbitIntradayCombined_20241111-20241112.ndjson.gz"
expected_key = os.path.join(
raw_key_prefix,
f"dataset={data_type}",
f"cohort={cohort}",
"FitbitIntradayCombined_20241111-20241112.ndjson.gz",
)
assert (
app.get_expected_raw_key(raw_key_prefix, data_type, cohort, path)
== expected_key
Expand All @@ -655,7 +658,12 @@ def test_get_expected_raw_key_case2():
data_type = "test-data-type"
cohort = "test-cohort"
path = "path/to/HealthKitV2Samples_AppleStandTime_20241111-20241112.json"
expected_key = f"{raw_key_prefix}/dataset={data_type}/cohort={cohort}/HealthKitV2Samples_AppleStandTime_20241111-20241112.ndjson.gz"
expected_key = os.path.join(
raw_key_prefix,
f"dataset={data_type}",
f"cohort={cohort}",
"HealthKitV2Samples_AppleStandTime_20241111-20241112.ndjson.gz",
)
assert (
app.get_expected_raw_key(raw_key_prefix, data_type, cohort, path)
== expected_key
Expand All @@ -667,7 +675,29 @@ def test_get_expected_raw_key_case3():
data_type = "test-data-type"
cohort = "test-cohort"
path = "path/to/HealthKitV2Samples_AppleStandTime_Deleted_20241111-20241112.json"
expected_key = f"{raw_key_prefix}/dataset={data_type}/cohort={cohort}/HealthKitV2Samples_AppleStandTime_Deleted_20241111-20241112.ndjson.gz"
expected_key = os.path.join(
raw_key_prefix,
f"dataset={data_type}",
f"cohort={cohort}",
"HealthKitV2Samples_AppleStandTime_Deleted_20241111-20241112.ndjson.gz",
)
assert (
app.get_expected_raw_key(raw_key_prefix, data_type, cohort, path)
== expected_key
)


def test_get_expected_raw_key_trailing_slash():
raw_key_prefix = "test-raw_key_prefix/json/"
data_type = "test-data-type"
cohort = "test-cohort"
path = "path/to/HealthKitV2Samples_AppleStandTime_Deleted_20241111-20241112.json"
expected_key = os.path.join(
raw_key_prefix,
f"dataset={data_type}",
f"cohort={cohort}",
"HealthKitV2Samples_AppleStandTime_Deleted_20241111-20241112.ndjson.gz",
)
assert (
app.get_expected_raw_key(raw_key_prefix, data_type, cohort, path)
== expected_key
Expand Down

0 comments on commit 8007b1d

Please sign in to comment.