Skip to content

Commit

Permalink
Fix test coverage
Browse files Browse the repository at this point in the history
  • Loading branch information
dbarrous committed Aug 11, 2023
1 parent 213b4bc commit 4a8ea24
Show file tree
Hide file tree
Showing 8 changed files with 1,382 additions and 768 deletions.
1,871 changes: 1,178 additions & 693 deletions poetry.lock

Large diffs are not rendered by default.

7 changes: 7 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,14 @@ packages = [

[tool.poetry.dependencies]
python = ">=3.8.1,<4.0"
slack_sdk = "3.19.5"
numpy = "^1.21.2"
boto3 = "1.28.4"
astropy = "5.1.1"
hermes_core = { git = "https://github.com/HERMES-SOC/hermes_core.git", branch = "main" }

[tool.poetry.group.dev.dependencies]
numpy = "^1.21.2"
pytest = "^7.2.0"
pytest-cov = "^4.0.0"
mypy = "^0.981"
Expand All @@ -26,6 +32,7 @@ psycopg = "3.1.8"
flake8 = "6.0.0"
black = "23.1.0"
slack_sdk = "3.19.5"
boto3 = "1.28.4"
hermes_core = { git = "https://github.com/HERMES-SOC/hermes_core.git", branch = "main" }


Expand Down
19 changes: 4 additions & 15 deletions sdc_aws_utils/aws.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import time
import hashlib
from datetime import datetime
from pathlib import Path
from typing import Callable
Expand Down Expand Up @@ -85,7 +84,7 @@ def create_s3_file_key(science_file_parser: Callable, old_file_key: str) -> str:
raise e


def list_files_in_bucket(s3_client, bucket_name: str):
def list_files_in_bucket(s3_client, bucket_name: str) -> list:
files = []
paginator = s3_client.get_paginator("list_objects_v2")
for page in paginator.paginate(Bucket=bucket_name):
Expand All @@ -94,7 +93,7 @@ def list_files_in_bucket(s3_client, bucket_name: str):
return files


def check_file_existence_in_target_buckets(s3_client, file_key, source_bucket: str, target_buckets: list):
def check_file_existence_in_target_buckets(s3_client, file_key: str, source_bucket: str, target_buckets: list) -> bool:
for target_bucket in target_buckets:
if object_exists(s3_client, target_bucket, file_key):
print(f"File {file_key} from {source_bucket} exists in {target_bucket}")
Expand All @@ -104,7 +103,7 @@ def check_file_existence_in_target_buckets(s3_client, file_key, source_bucket: s
return False


def object_exists(s3_client, bucket: str, file_key: str, etag: bytes = None) -> bool:
def object_exists(s3_client, bucket: str, file_key: str) -> bool:
"""
Check if a file exists in the specified bucket, and optionally if its content matches a given hash.
Expand All @@ -115,18 +114,8 @@ def object_exists(s3_client, bucket: str, file_key: str, etag: bytes = None) ->
:return: True if the file exists and (optionally) its content matches, False otherwise.
"""
try:
response = s3_client.head_object(Bucket=bucket, Key=file_key)
log.info(response)
# If file_content is not provided, just check for existence
if not etag:
return True

# Calculate the MD5 hash of the provided content
# md5 = hashlib.md5(file_content).hexdigest()

# Compare with the ETag from the response (removing any surrounding quotes from ETag)
s3_client.head_object(Bucket=bucket, Key=file_key)
return True

except botocore.exceptions.ClientError:
return False

Expand Down
7 changes: 2 additions & 5 deletions sdc_aws_utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,8 @@


def read_config_file():
try:
config_file_path = os.getenv("SDC_AWS_CONFIG_FILE_PATH", "./config.yaml")
except Exception as e:
log.error({"status": "ERROR", "message": e})
raise e
# Get the config file path from environment variable
config_file_path = os.getenv("SDC_AWS_CONFIG_FILE_PATH", "./config.yaml")
try:
with open(config_file_path) as f:
config = yaml.safe_load(f)
Expand Down
20 changes: 8 additions & 12 deletions sdc_aws_utils/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,12 @@
# Format for log file entries log_file_format = %(asctime)s, %(origin)s, %(levelname)s, %(message)s
log_file_format = "%(asctime)s, %(origin)s, %(levelname)s, %(message)s"

# Set log level
environment = os.getenv("LAMBDA_ENVIRONMENT", "DEVELOPMENT")
log.setLevel(logging.DEBUG)
if environment == "PRODUCTION":
log.setLevel(logging.INFO)
log_file = "/tmp/sdc_aws_processing_lambda.log"
fh = logging.FileHandler(log_file)
fh.setLevel(logging.INFO)
formatter = logging.Formatter(log_file_format)

# To remove boto3 noisy debug logging
logging.getLogger("botocore").setLevel(logging.CRITICAL)
logging.getLogger("boto3").setLevel(logging.CRITICAL)
def configure_logger():
# Set log level
environment = os.getenv("LAMBDA_ENVIRONMENT", "DEVELOPMENT")
log.setLevel(logging.DEBUG)
if environment == "PRODUCTION":
log.setLevel(logging.INFO)
logging.getLogger("botocore").setLevel(logging.CRITICAL)
logging.getLogger("boto3").setLevel(logging.CRITICAL)
124 changes: 95 additions & 29 deletions tests/test_aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
create_s3_client_session,
create_s3_file_key,
create_timestream_client_session,
copy_file_in_s3,
list_files_in_bucket,
check_file_existence_in_target_buckets,
download_file_from_s3,
log_to_timestream,
object_exists,
Expand All @@ -20,8 +23,10 @@
# from lambda_function.file_processor.config import parser


TEST_BUCKET = "test-bucket"

SOURCE_BUCKET = "test-bucket"
DEST_BUCKET = "dest-bucket"
FILE_KEY = "test_file.txt"
NEW_FILE_KEY = "new_test_file.txt"
BAD_BUCKET = "bad-bucket"

parser = parse_science_filename
Expand Down Expand Up @@ -98,11 +103,11 @@ def test_create_s3_file_key():
assert valid_key == "l0/2022/12/hermes_EEA_l0_2022335-200137_v01.bin"

# Test CDF file
test_valid_file_key = "hermes_eea_ql_20230205_000006_v1.0.01.cdf"
test_valid_file_key = "hermes_eea_ql_20230205T000006_v1.0.01.cdf"

valid_key = create_s3_file_key(parser, old_file_key=test_valid_file_key)

assert valid_key == "ql/2023/02/hermes_eea_ql_20230205_000006_v1.0.01.cdf"
assert valid_key == "ql/2023/02/hermes_eea_ql_20230205T000006_v1.0.01.cdf"

def test_parser(filename):
return {"level": "l0"}
Expand All @@ -125,44 +130,44 @@ def test_parser(filename):
@mock_s3
def test_object_exists():
s3_client = boto3.client("s3")
s3_client.create_bucket(Bucket=TEST_BUCKET)
s3_client.put_object(Bucket=TEST_BUCKET, Key="test_key", Body="test_data")
s3_client.create_bucket(Bucket=SOURCE_BUCKET)
s3_client.put_object(Bucket=SOURCE_BUCKET, Key="test_key", Body="test_data")

assert object_exists(s3_client, TEST_BUCKET, "test_key")
assert not object_exists(s3_client, TEST_BUCKET, "non_existent_key")
assert object_exists(s3_client, SOURCE_BUCKET, "test_key")
assert not object_exists(s3_client, SOURCE_BUCKET, "non_existent_key")


@mock_s3
def test_download_file_from_s3():
s3_client = boto3.client("s3")
s3_client.create_bucket(Bucket=TEST_BUCKET)
s3_client.put_object(Bucket=TEST_BUCKET, Key="test_key", Body="test_data")
s3_client.create_bucket(Bucket=SOURCE_BUCKET)
s3_client.put_object(Bucket=SOURCE_BUCKET, Key="test_key", Body="test_data")

local_path = download_file_from_s3(s3_client, TEST_BUCKET, "test_key", "downloaded_key")
local_path = download_file_from_s3(s3_client, SOURCE_BUCKET, "test_key", "downloaded_key")
assert local_path == Path("/tmp/downloaded_key")
assert local_path.is_file()

# Try to download a non-existent file
try:
download_file_from_s3(s3_client, TEST_BUCKET, "non_existent_key", "downloaded_key")
download_file_from_s3(s3_client, SOURCE_BUCKET, "non_existent_key", "downloaded_key")
except botocore.exceptions.ClientError as e:
assert e is not None


@mock_s3
def test_upload_file_to_s3():
s3_client = boto3.client("s3")
s3_client.create_bucket(Bucket=TEST_BUCKET)
s3_client.create_bucket(Bucket=SOURCE_BUCKET)

with open("/tmp/test_upload.txt", "w") as f:
f.write("test_data")

local_path = upload_file_to_s3(s3_client, "test_upload.txt", TEST_BUCKET, "uploaded_key")
local_path = upload_file_to_s3(s3_client, "test_upload.txt", SOURCE_BUCKET, "uploaded_key")
assert local_path == Path("/tmp/test_upload.txt")
assert s3_client.get_object(Bucket=TEST_BUCKET, Key="uploaded_key")["Body"].read().decode("utf-8") == "test_data"
assert s3_client.get_object(Bucket=SOURCE_BUCKET, Key="uploaded_key")["Body"].read().decode("utf-8") == "test_data"
# Try to upload a non-existent file
try:
upload_file_to_s3(s3_client, "bad_test_upload.txt", TEST_BUCKET, "uploaded_key")
upload_file_to_s3(s3_client, "bad_test_upload.txt", SOURCE_BUCKET, "uploaded_key")
except FileNotFoundError as e:
assert e is not None

Expand All @@ -173,44 +178,105 @@ def test_upload_file_to_s3():
assert e is not None


@mock_s3
def test_copy_file_in_s3():
s3_client = boto3.client("s3")
s3_client.create_bucket(Bucket=SOURCE_BUCKET)
s3_client.create_bucket(Bucket=DEST_BUCKET)

s3_client.put_object(Bucket=SOURCE_BUCKET, Key=FILE_KEY, Body="test data")

copy_file_in_s3(s3_client, SOURCE_BUCKET, DEST_BUCKET, FILE_KEY, NEW_FILE_KEY)

# Check if the file exists in the new location
assert s3_client.get_object(Bucket=DEST_BUCKET, Key=NEW_FILE_KEY)["Body"].read().decode() == "test data"

try:
copy_file_in_s3(s3_client, SOURCE_BUCKET, DEST_BUCKET, "non_existent_key", NEW_FILE_KEY)
assert False
except botocore.exceptions.ClientError as e:
assert e is not None


@mock_s3
def test_list_files_in_bucket():
s3_client = boto3.client("s3")
s3_client.create_bucket(Bucket=SOURCE_BUCKET)

s3_client.put_object(Bucket=SOURCE_BUCKET, Key=FILE_KEY, Body="test data")

files = list_files_in_bucket(s3_client, SOURCE_BUCKET)

assert FILE_KEY in files


@mock_s3
def test_check_file_existence_in_target_buckets():
s3_client = boto3.client("s3")
s3_client.create_bucket(Bucket=SOURCE_BUCKET)
s3_client.create_bucket(Bucket=DEST_BUCKET)

s3_client.put_object(Bucket=DEST_BUCKET, Key=FILE_KEY, Body="test data")

exists = check_file_existence_in_target_buckets(s3_client, FILE_KEY, SOURCE_BUCKET, [DEST_BUCKET])

assert exists is True

# Cleanup
s3_client.delete_object(Bucket=DEST_BUCKET, Key=FILE_KEY)
exists_after_delete = check_file_existence_in_target_buckets(s3_client, FILE_KEY, SOURCE_BUCKET, [DEST_BUCKET])

assert exists_after_delete is False


@mock_timestreamwrite
def test_log_to_timestream():
timestream_client = boto3.client("timestream-write", region_name="us-east-1")

db_name = "sdc_aws_logs"
table_name = "sdc_aws_s3_bucket_log_table"

# Set up the database and table
try:
timestream_client.create_database(DatabaseName="sdc_aws_logs")
timestream_client.create_database(DatabaseName=db_name)
except timestream_client.exceptions.ConflictException:
pass

try:
timestream_client.create_table(DatabaseName="sdc_aws_logs", TableName="sdc_aws_s3_bucket_log_table")
timestream_client.create_table(DatabaseName=db_name, TableName=table_name)
except timestream_client.exceptions.ConflictException:
pass

# No need to add assertions since we're only testing if the function can be called without exceptions
log_to_timestream(
timestream_client,
db_name,
table_name,
"COPY",
"test_file.txt",
"L1/2022/09/test_file.txt",
TEST_BUCKET,
TEST_BUCKET,
SOURCE_BUCKET,
SOURCE_BUCKET,
"PRODUCTION",
)

# Test without passing a timestream client
try:
log_to_timestream()
except Exception as e:
assert e is not None

# Test without passing a timestream client
# Test passing minimum required arguments
try:
log_to_timestream(
None,
timestream_client,
db_name,
table_name,
"COPY",
"test_file.txt",
"L1/2022/09/test_file.txt",
)
except Exception as e:
assert False
except ValueError as e:
assert e is not None

# Test without passing any arguments
try:
log_to_timestream()
assert False
except TypeError as e:
assert e is not None
Loading

0 comments on commit 4a8ea24

Please sign in to comment.