Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: Functions python refactoring #850

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 0 additions & 8 deletions .github/workflows/api-deployer.yml
Original file line number Diff line number Diff line change
Expand Up @@ -201,10 +201,6 @@ jobs:
name: database_gen
path: api/src/database_gen/

- name: Copy to db models to functions directory
run: |
cp -R api/src/database_gen/ functions-python/database_gen
# api schema was generated and uploaded in api-build-test job above.
- uses: actions/download-artifact@v4
with:
Expand Down Expand Up @@ -249,10 +245,6 @@ jobs:
name: database_gen
path: api/src/database_gen/

- name: Copy to db models to functions directory
run: |
cp -R api/src/database_gen/ functions-python/database_gen
# api schema was generated and uploaded in api-build-test job above.
- uses: actions/download-artifact@v4
with:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/datasets-batch-deployer.yml
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ jobs:
uses: actions/upload-artifact@v4
with:
name: database_gen
path: functions-python/database_gen/
path: api/src/database_gen/

- name: Build python functions
run: |
Expand Down
4 changes: 3 additions & 1 deletion functions-python/.gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
.dist
.dist
shared
test_shared
3 changes: 2 additions & 1 deletion functions-python/batch_datasets/function_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
"timeout": 20,
"memory": "256Mi",
"trigger_http": true,
"include_folders": ["database_gen", "helpers", "dataset_service"],
"include_folders": ["helpers", "dataset_service"],
"include_api_folders": ["database_gen"],
"secret_environment_variables": [
{
"key": "FEEDS_DATABASE_URL"
Expand Down
2 changes: 1 addition & 1 deletion functions-python/batch_datasets/main_local_debug.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

# Usage:
# - python batch_datasets/main_local_debug.py
from src.main import batch_datasets
from main import batch_datasets
from dotenv import load_dotenv

# Load environment variables from .env.local
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from unittest import mock
import pytest
from unittest.mock import Mock, patch, MagicMock
from batch_datasets.src.main import get_non_deprecated_feeds, batch_datasets
from main import get_non_deprecated_feeds, batch_datasets
from test_utils.database_utils import get_testing_session, default_db_url


Expand All @@ -39,8 +39,8 @@ def test_get_non_deprecated_feeds():
"FEEDS_LIMIT": "5",
},
)
@patch("batch_datasets.src.main.publish")
@patch("batch_datasets.src.main.get_pubsub_client")
@patch("main.publish")
@patch("main.get_pubsub_client")
def test_batch_datasets(mock_client, mock_publish):
mock_client.return_value = MagicMock()
with get_testing_session() as session:
Expand All @@ -64,7 +64,7 @@ def test_batch_datasets(mock_client, mock_publish):
]


@patch("batch_datasets.src.main.start_db_session")
@patch("main.start_db_session")
def test_batch_datasets_exception(start_db_session_mock):
exception_message = "Failure occurred"
start_db_session_mock.side_effect = Exception(exception_message)
Expand Down
3 changes: 2 additions & 1 deletion functions-python/batch_process_dataset/function_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
"timeout": 540,
"memory": "2Gi",
"trigger_http": true,
"include_folders": ["database_gen", "helpers", "dataset_service"],
"include_folders": ["helpers", "dataset_service"],
"include_api_folders": ["database_gen"],
"secret_environment_variables": [
{
"key": "FEEDS_DATABASE_URL"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from hashlib import sha256
from typing import Final
from unittest.mock import patch, MagicMock, Mock, mock_open
from batch_process_dataset.src.main import (
from main import (
DatasetProcessor,
DatasetFile,
process_dataset,
Expand Down Expand Up @@ -40,8 +40,8 @@ def create_cloud_event(mock_data):


class TestDatasetProcessor(unittest.TestCase):
@patch("batch_process_dataset.src.main.DatasetProcessor.upload_file_to_storage")
@patch("batch_process_dataset.src.main.DatasetProcessor.download_content")
@patch("main.DatasetProcessor.upload_file_to_storage")
@patch("main.DatasetProcessor.download_content")
def test_upload_dataset_diff_hash(
self, mock_download_url_content, upload_file_to_storage
):
Expand Down Expand Up @@ -80,8 +80,8 @@ def test_upload_dataset_diff_hash(
# Upload to storage is called twice, one for the latest and one for the timestamped one
self.assertEqual(upload_file_to_storage.call_count, 2)

@patch("batch_process_dataset.src.main.DatasetProcessor.upload_file_to_storage")
@patch("batch_process_dataset.src.main.DatasetProcessor.download_content")
@patch("main.DatasetProcessor.upload_file_to_storage")
@patch("main.DatasetProcessor.download_content")
def test_upload_dataset_same_hash(
self, mock_download_url_content, upload_file_to_storage
):
Expand Down Expand Up @@ -112,8 +112,8 @@ def test_upload_dataset_same_hash(
mock_blob.make_public.assert_not_called()
mock_download_url_content.assert_called_once()

@patch("batch_process_dataset.src.main.DatasetProcessor.upload_file_to_storage")
@patch("batch_process_dataset.src.main.DatasetProcessor.download_content")
@patch("main.DatasetProcessor.upload_file_to_storage")
@patch("main.DatasetProcessor.download_content")
def test_upload_dataset_not_zip(
self, mock_download_url_content, upload_file_to_storage
):
Expand Down Expand Up @@ -144,8 +144,8 @@ def test_upload_dataset_not_zip(
mock_blob.make_public.assert_not_called()
mock_download_url_content.assert_called_once()

@patch("batch_process_dataset.src.main.DatasetProcessor.upload_file_to_storage")
@patch("batch_process_dataset.src.main.DatasetProcessor.download_content")
@patch("main.DatasetProcessor.upload_file_to_storage")
@patch("main.DatasetProcessor.download_content")
def test_upload_dataset_download_exception(
self, mock_download_url_content, upload_file_to_storage
):
Expand Down Expand Up @@ -355,9 +355,9 @@ def test_process_no_change(self):
self.assertIsNone(result)
processor.create_dataset.assert_not_called()

@patch("batch_process_dataset.src.main.Logger")
@patch("batch_process_dataset.src.main.DatasetTraceService")
@patch("batch_process_dataset.src.main.DatasetProcessor")
@patch("main.Logger")
@patch("main.DatasetTraceService")
@patch("main.DatasetProcessor")
def test_process_dataset_normal_execution(
self, mock_dataset_processor, mock_dataset_trace, _
):
Expand Down Expand Up @@ -391,9 +391,9 @@ def test_process_dataset_normal_execution(
mock_dataset_processor.assert_called_once()
mock_dataset_processor_instance.process.assert_called_once()

@patch("batch_process_dataset.src.main.Logger")
@patch("batch_process_dataset.src.main.DatasetTraceService")
@patch("batch_process_dataset.src.main.DatasetProcessor")
@patch("main.Logger")
@patch("main.DatasetTraceService")
@patch("main.DatasetProcessor")
def test_process_dataset_exception_caught(
self, mock_dataset_processor, mock_dataset_trace, _
):
Expand All @@ -415,8 +415,8 @@ def test_process_dataset_exception_caught(
# Call the function
process_dataset(cloud_event)

@patch("batch_process_dataset.src.main.Logger")
@patch("batch_process_dataset.src.main.DatasetTraceService")
@patch("main.Logger")
@patch("main.DatasetTraceService")
def test_process_dataset_missing_stable_id(self, mock_dataset_trace, _):
db_url = os.getenv("TEST_FEEDS_DATABASE_URL", default=default_db_url)
os.environ["FEEDS_DATABASE_URL"] = db_url
Expand Down
3 changes: 2 additions & 1 deletion functions-python/big_query_ingestion/function_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
"timeout": 540,
"memory": "8Gi",
"trigger_http": false,
"include_folders": ["database_gen", "helpers"],
"include_folders": ["helpers"],
"include_api_folders": ["database_gen"],
"environment_variables": [],
"secret_environment_variables": [
{
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import os

from ..common.bq_data_transfer import BigQueryDataTransfer
from common.bq_data_transfer import BigQueryDataTransfer


class BigQueryDataTransferGBFS(BigQueryDataTransfer):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import os

from ..common.bq_data_transfer import BigQueryDataTransfer
from common.bq_data_transfer import BigQueryDataTransfer


class BigQueryDataTransferGTFS(BigQueryDataTransfer):
Expand Down
4 changes: 2 additions & 2 deletions functions-python/big_query_ingestion/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
import functions_framework

from helpers.logger import Logger
from .gbfs.gbfs_big_query_ingest import BigQueryDataTransferGBFS
from .gtfs.gtfs_big_query_ingest import BigQueryDataTransferGTFS
from gbfs.gbfs_big_query_ingest import BigQueryDataTransferGBFS
from gtfs.gtfs_big_query_ingest import BigQueryDataTransferGTFS

logging.basicConfig(level=logging.INFO)

Expand Down
34 changes: 14 additions & 20 deletions functions-python/big_query_ingestion/tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,27 +3,27 @@

from google.cloud import bigquery

from big_query_ingestion.src.common.bq_data_transfer import BigQueryDataTransfer
from common.bq_data_transfer import BigQueryDataTransfer


class TestBigQueryDataTransfer(unittest.TestCase):
@patch("google.cloud.storage.Client")
@patch("big_query_ingestion.src.common.bq_data_transfer.bigquery.Client")
@patch("common.bq_data_transfer.bigquery.Client")
def setUp(self, mock_bq_client, mock_storage_client):
self.transfer = BigQueryDataTransfer()
self.transfer.schema_path = "fake_schema_path.json"
self.mock_bq_client = mock_bq_client
self.mock_storage_client = mock_storage_client

@patch("big_query_ingestion.src.common.bq_data_transfer.bigquery.DatasetReference")
@patch("common.bq_data_transfer.bigquery.DatasetReference")
def test_create_bigquery_dataset_exists(self, _):
self.mock_bq_client().get_dataset.return_value = True
self.transfer.create_bigquery_dataset()

self.mock_bq_client().get_dataset.assert_called_once()
self.mock_bq_client().create_dataset.assert_not_called()

@patch("big_query_ingestion.src.common.bq_data_transfer.bigquery.DatasetReference")
@patch("common.bq_data_transfer.bigquery.DatasetReference")
def test_create_bigquery_dataset_not_exists(self, _):
self.mock_bq_client().get_dataset.side_effect = Exception("Dataset not found")

Expand All @@ -32,9 +32,9 @@ def test_create_bigquery_dataset_not_exists(self, _):
self.mock_bq_client().get_dataset.assert_called_once()
self.mock_bq_client().create_dataset.assert_called_once()

@patch("big_query_ingestion.src.common.bq_data_transfer.load_json_schema")
@patch("big_query_ingestion.src.common.bq_data_transfer.json_schema_to_bigquery")
@patch("big_query_ingestion.src.common.bq_data_transfer.bigquery.DatasetReference")
@patch("common.bq_data_transfer.load_json_schema")
@patch("common.bq_data_transfer.json_schema_to_bigquery")
@patch("common.bq_data_transfer.bigquery.DatasetReference")
def test_create_bigquery_table_not_exists(
self, _, mock_json_schema_to_bigquery, mock_load_json_schema
):
Expand All @@ -53,7 +53,7 @@ def test_create_bigquery_table_not_exists(
mock_json_schema_to_bigquery.assert_called_once()
self.mock_bq_client().create_table.assert_called_once()

@patch("big_query_ingestion.src.common.bq_data_transfer.bigquery.DatasetReference")
@patch("common.bq_data_transfer.bigquery.DatasetReference")
def test_create_bigquery_table_exists(self, _):
self.mock_bq_client().get_table.return_value = True

Expand All @@ -62,7 +62,7 @@ def test_create_bigquery_table_exists(self, _):
self.mock_bq_client().get_table.assert_called_once()
self.mock_bq_client().create_table.assert_not_called()

@patch("big_query_ingestion.src.common.bq_data_transfer.bigquery.DatasetReference")
@patch("common.bq_data_transfer.bigquery.DatasetReference")
def test_load_data_to_bigquery(self, _):
mock_blob = MagicMock()
mock_blob.name = "file1.ndjson"
Expand All @@ -77,7 +77,7 @@ def test_load_data_to_bigquery(self, _):
self.mock_bq_client().load_table_from_uri.assert_called_once()
mock_load_job.result.assert_called_once()

@patch("big_query_ingestion.src.common.bq_data_transfer.bigquery.DatasetReference")
@patch("common.bq_data_transfer.bigquery.DatasetReference")
def test_load_data_to_bigquery_error(self, _):
mock_blob = MagicMock()
mock_blob.name = "file1.ndjson"
Expand All @@ -95,15 +95,9 @@ def test_load_data_to_bigquery_error(self, _):
log.output[0],
)

@patch(
"big_query_ingestion.src.common.bq_data_transfer.BigQueryDataTransfer.create_bigquery_dataset"
)
@patch(
"big_query_ingestion.src.common.bq_data_transfer.BigQueryDataTransfer.create_bigquery_table"
)
@patch(
"big_query_ingestion.src.common.bq_data_transfer.BigQueryDataTransfer.load_data_to_bigquery"
)
@patch("common.bq_data_transfer.BigQueryDataTransfer.create_bigquery_dataset")
@patch("common.bq_data_transfer.BigQueryDataTransfer.create_bigquery_table")
@patch("common.bq_data_transfer.BigQueryDataTransfer.load_data_to_bigquery")
def test_send_data_to_bigquery_success(
self, mock_load_data, mock_create_table, mock_create_dataset
):
Expand All @@ -116,7 +110,7 @@ def test_send_data_to_bigquery_success(
self.assertEqual(response, "Data successfully loaded to BigQuery")

@patch(
"big_query_ingestion.src.common.bq_data_transfer.BigQueryDataTransfer.create_bigquery_dataset",
"common.bq_data_transfer.BigQueryDataTransfer.create_bigquery_dataset",
side_effect=Exception("Dataset creation failed"),
)
def test_send_data_to_bigquery_failure(self, mock_create_dataset):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import unittest
from unittest.mock import patch

from big_query_ingestion.src.gbfs.gbfs_big_query_ingest import BigQueryDataTransferGBFS
from gbfs.gbfs_big_query_ingest import BigQueryDataTransferGBFS


class TestBigQueryDataTransferGBFS(unittest.TestCase):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import unittest
from unittest.mock import patch

from big_query_ingestion.src.gtfs.gtfs_big_query_ingest import BigQueryDataTransferGTFS
from gtfs.gtfs_big_query_ingest import BigQueryDataTransferGTFS


class TestBigQueryDataTransferGTFS(unittest.TestCase):
Expand Down
10 changes: 5 additions & 5 deletions functions-python/big_query_ingestion/tests/test_main.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
import unittest
from unittest.mock import patch

from big_query_ingestion.src.main import (
from main import (
ingest_data_to_big_query_gtfs,
ingest_data_to_big_query_gbfs,
)


class TestMain(unittest.TestCase):
@patch("big_query_ingestion.src.main.BigQueryDataTransferGTFS")
@patch("main.BigQueryDataTransferGTFS")
@patch("helpers.logger.Logger.init_logger")
@patch("big_query_ingestion.src.main.logging.info")
@patch("main.logging.info")
def test_ingest_data_to_big_query_gtfs(
self, mock_logging_info, mock_init_logger, mock_big_query_transfer_gtfs
):
Expand All @@ -27,9 +27,9 @@ def test_ingest_data_to_big_query_gtfs(
mock_instance.send_data_to_bigquery.assert_called_once()
self.assertEqual(response, ("Data successfully loaded to BigQuery", 200))

@patch("big_query_ingestion.src.main.BigQueryDataTransferGBFS")
@patch("main.BigQueryDataTransferGBFS")
@patch("helpers.logger.Logger.init_logger")
@patch("big_query_ingestion.src.main.logging.info")
@patch("main.logging.info")
def test_ingest_data_to_big_query_gbfs(
self, mock_logging_info, mock_init_logger, mock_biq_query_transfer_gbfs
):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from datetime import datetime
from unittest.mock import patch, MagicMock

from dataset_service.main import (
from main import (
DatasetTrace,
DatasetTraceService,
Status,
Expand Down Expand Up @@ -74,7 +74,7 @@ def test_save_batch_execution(self, mock_datastore_client):
mock_datastore_client.key.assert_called_once_with("batch_execution", "123")

@patch("google.cloud.datastore.Client")
@patch("dataset_service.main.DatasetTraceService._entity_to_dataset_trace")
@patch("main.DatasetTraceService._entity_to_dataset_trace")
def test_get_by_execution_and_stable_ids(
self, mock_entity_to_dataset_trace, mock_datastore_client
):
Expand Down
Loading
Loading