Skip to content

Commit

Permalink
Merge pull request #70 from VMess1/storage_errors
Browse files Browse the repository at this point in the history
Combines tests for storage handler and checked PEP8 compliance
  • Loading branch information
VMess1 authored Nov 15, 2023
2 parents 7813136 + 3f5f5ec commit 477faef
Show file tree
Hide file tree
Showing 6 changed files with 128 additions and 8 deletions.
4 changes: 2 additions & 2 deletions src/processing/dim_table_transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def dim_remove_dates(data):
transformed_data = data.drop(columns="created_at", inplace=False, axis=1)
transformed_data = transformed_data.drop(
columns='last_updated', inplace=False, axis=1)

return transformed_data


Expand Down Expand Up @@ -96,7 +96,7 @@ def join_address(counterparty_df, address_df):


def dim_locationtf(address_df):
'''takes address dataframe and renames column to
'''takes address dataframe and renames column to
transform into location dataframe'''
renamed_df = address_df.rename(
{'address_id': 'location_id'}, axis='columns')
Expand Down
6 changes: 3 additions & 3 deletions src/processing/processing_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,12 @@ def main(event, context):
First, checks transformation bucket for directories.
If dim_date is not a directory, creates this table.
This should only be created once as it is just a list
of dates from 2020 to 2050.
of dates from 2020 to 2050.
Second, based on the table whose update has triggered
this function, transforms tables as neccessary to
match the OLAP format.
Last, the writes the table to the transformation bucket
in parquet format.
Handles errors that may arise during the transformation
Expand Down
2 changes: 1 addition & 1 deletion src/processing/read_write_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def check_transformation_bucket(client, target_bucket):

def compile_full_csv_table(client, target_bucket, table_name):
'''
Takes S3 client, bucket and table info of the full table
Takes S3 client, bucket and table info of the full table
needed. Compiles csv files and returns a dataframe with any
duplicates removed
'''
Expand Down
2 changes: 1 addition & 1 deletion tests/test_processing/test_dim_table_transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def test_new_currency_table_includes_correct_currency_name(self):
def test_invalid_code_marked_as_invalid_in_currency_name(self):
'''
Creates input and expected dataframes with invalid currency codes.
Tests that the invalid code gets transformed to "Invalid"
Tests that the invalid code gets transformed to "Invalid"
'''
test_input = pd.DataFrame(data={
'currency_id': [1, 2, 3],
Expand Down
2 changes: 1 addition & 1 deletion tests/test_processing/test_read_write_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def test_data_includes_all_csv_files_in_directory(self, mock_csv_bucket):
def test_data_removes_duplicate_csv_files_in_directory(
self, mock_csv_bucket):
'''
test that compiling removes any duplicate rows of data
test that compiling removes any duplicate rows of data
'''
test_data_1 = (
'item_id;item_name;created_at;last_updated\n' +
Expand Down
120 changes: 120 additions & 0 deletions tests/test_storage/test_storage_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from unittest.mock import patch
from datetime import date, time
from decimal import Decimal
from botocore.exceptions import ClientError


from tests.test_storage.data.main_dataframes import (
Expand Down Expand Up @@ -117,6 +118,42 @@ def mock_missing_parquet_bucket(aws_credentials):
yield conn


@pytest.fixture(scope='function')
def mock_patches(mock_missing_parquet_bucket):
with (
patch('src.storage.storage_handler.get_table_list')
as mock_get_table_list,
patch('src.storage.storage_handler.get_credentials')
as mock_get_credentials,
patch('src.storage.storage_handler.get_con')
as mock_get_con,
patch('src.storage.storage_handler.get_s3_client',
return_value=mock_missing_parquet_bucket)
as mock_get_s3_client,
patch('src.storage.storage_handler.get_last_timestamp')
as mock_get_last_timestamp,
patch('src.storage.storage_handler.run_insert_query')
as mock_run_insert_query,
patch('src.storage.storage_handler.compile_parquet_data')
as mock_compile_parquet_data,
patch('src.storage.storage_handler.write_current_timestamp')
as mock_write_current_timestamp
):
mock_get_table_list.return_value = ['table1', 'table2']
mock_get_credentials.return_value = 'mocked_credentials'
mock_get_con.return_value = 'mocked_connection'
yield {
'mock_get_table_list': mock_get_table_list,
'mock_get_credentials': mock_get_credentials,
'mock_get_con': mock_get_con,
'mock_get_s3_client': mock_get_s3_client,
'mock_get_last_timestamp': mock_get_last_timestamp,
'mock_run_insert_query': mock_run_insert_query,
'mock_compile_parquet_data': mock_compile_parquet_data,
'mock_write_current_timestamp': mock_write_current_timestamp,
}


class TestBasicFunctionRuns:
@patch('src.storage.storage_handler.get_table_list')
@patch('src.storage.storage_handler.get_credentials')
Expand Down Expand Up @@ -220,3 +257,86 @@ def test_fact_sales_order_table_updates(
Decimal('4.50'), 6]]
result = seeded_connection.run("SELECT * FROM fact_test_sales_order")
assert result == test_expected


class TestErrorHandling:
def test_transformation_bucket_not_found(self, caplog, mock_patches):
"""
Tests that a ClientError of NoSuchBucket returns the
correct logging error if no such bucket exists
"""
mock_patches['mock_compile_parquet_data'].side_effect = ClientError(
error_response={
"Error": {
"Code": "NoSuchBucket",
"Message": "The specified bucket does not exist.",
"BucketName": "nc-group3-transformation-bucket"
}
},
operation_name="ClientError"
)
with caplog.at_level(logging.ERROR):
main(None, None)
expected = "Bucket not found: nc-group3-transformation-bucket"
assert expected in caplog.text

def test_handler_logs_internal_service_errors(
self, caplog, mock_patches
):
"""
Tests that a ClientError of InternalServiceError returns the
correct logging error
"""
mock_patches['mock_compile_parquet_data'].side_effect = ClientError(
error_response={
"Error": {
"Code": "InternalServiceError",
"Message": "Internal service error detected!",
"BucketName": "nc-group3-transformation-bucket"
}
},
operation_name="ClientError"
)
with caplog.at_level(logging.ERROR):
main(None, None)
expected = "Internal service error detected."
assert expected in caplog.text

def test_handler_logs_no_such_key_errors(
self, caplog, mock_patches
):
"""
Tests that a ClientError of NoSuchKey returns the
correct logging error
"""
mock_patches['mock_compile_parquet_data'].side_effect = ClientError(
error_response={
"Error": {
"Code": "NoSuchKey",
"Message": "Testing key errors",
"BucketName": "nc-group3-transformation-bucket"
}
},
operation_name="ClientError"
)
with caplog.at_level(logging.ERROR):
main(None, None)
expected = "No such key"
assert expected in caplog.text

def test_handler_logs_error_for_incorrect_parameter_type(
self, caplog, mock_patches
):
mock_patches['mock_get_table_list'].return_value = 123
with caplog.at_level(logging.ERROR):
main(None, None)
expected = "Incorrect parameter type:"
assert expected in caplog.text

def test_file_logs_exception_error_message(
self, caplog, mock_patches):
mock_patches['mock_get_table_list'].side_effect = Exception
with caplog.at_level(logging.ERROR):
main(None, None)
expected = "An unexpected error has occurred:"
assert expected in caplog.text

0 comments on commit 477faef

Please sign in to comment.