Skip to content

Commit

Permalink
chk test validate
Browse files Browse the repository at this point in the history
  • Loading branch information
fvankrieken committed Dec 20, 2024
1 parent 0668158 commit 2e67f53
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 2 deletions.
37 changes: 37 additions & 0 deletions dcpy/test/lifecycle/ingest/shared.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,30 @@
from datetime import datetime
from pathlib import Path

from dcpy.models.connectors.edm.publishing import GisDataset
from dcpy.models.connectors.edm.recipes import Dataset
from dcpy.models import file, library
from dcpy.models.connectors import socrata, web
from dcpy.models.lifecycle.ingest import (
LocalFileSource,
ScriptSource,
S3Source,
DEPublished,
DatasetAttributes,
ArchivalMetadata,
Ingestion,
Config,
)
from dcpy.utils.metadata import get_run_details
from dcpy.test.conftest import RECIPES_BUCKET

RESOURCES = Path(__file__).parent / "resources"
TEMPLATE_DIR = RESOURCES / "templates"
TEST_DATA_DIR = "test_data"
TEST_OUTPUT = RESOURCES / TEST_DATA_DIR / "output.parquet"
TEST_DATASET_NAME = "test_dataset"
FAKE_VERSION = "20240101"
TEST_DATASET = Dataset(id=TEST_DATASET_NAME, version=FAKE_VERSION)


class Sources:
Expand All @@ -39,6 +49,33 @@ class Sources:
)


BASIC_CONFIG = Config(
id=TEST_DATASET_NAME,
version=FAKE_VERSION,
attributes=DatasetAttributes(name=TEST_DATASET_NAME),
archival=ArchivalMetadata(
archival_timestamp=datetime(2024, 1, 1),
raw_filename="dummy.txt",
acl="public-read",
),
ingestion=Ingestion(source=Sources.local_file, file_format=file.Csv(type="csv")),
run_details=get_run_details(),
)

BASIC_LIBRARY_CONFIG = library.Config(
dataset=library.DatasetDefinition(
name=TEST_DATASET_NAME,
version=FAKE_VERSION,
acl="public-read",
source=library.DatasetDefinition.SourceSection(),
destination=library.DatasetDefinition.DestinationSection(
geometry=library.GeometryType(SRS="NONE", type="NONE")
),
),
execution_details=get_run_details(),
)


SOURCE_FILENAMES = [
(Sources.local_file, "dummy.txt"),
(Sources.gis, f"{TEST_DATASET_NAME}.zip"),
Expand Down
Empty file.
57 changes: 55 additions & 2 deletions dcpy/test/lifecycle/ingest/test_validate.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,68 @@
from io import BytesIO
import json
import pytest
import yaml

from dcpy.test.conftest import RECIPES_BUCKET
from dcpy.models.lifecycle.ingest import Template
from dcpy.lifecycle.ingest import configure, transform
from dcpy.utils import s3
from dcpy.connectors.edm import recipes
from dcpy.lifecycle.ingest import configure, transform, validate

from .shared import (
TEST_DATASET,
TEST_OUTPUT,
BASIC_CONFIG,
BASIC_LIBRARY_CONFIG,
)


@pytest.mark.parametrize("dataset", [t.name for t in configure.TEMPLATE_DIR.glob("*")])
def test_validate_all_datasets(dataset):
def test_validate_all_templates(dataset):
with open(configure.TEMPLATE_DIR / dataset, "r") as f:
s = yaml.safe_load(f)
template = Template(**s)
transform.validate_processing_steps(
template.id, template.ingestion.processing_steps
)


class TestValidateAgainstExistingVersions:
def test_new(self, create_buckets):
assert (
validate.validate_against_existing_versions(TEST_DATASET, TEST_OUTPUT)
== validate.ArchiveAction.push
)

def test_existing_library(self, create_buckets):
ds = BASIC_LIBRARY_CONFIG.sparse_dataset
config_str = json.dumps(BASIC_LIBRARY_CONFIG.model_dump(mode="json"))
s3.upload_file_obj(
BytesIO(config_str.encode()),
RECIPES_BUCKET,
f"{recipes.s3_folder_path(ds)}/config.json",
BASIC_LIBRARY_CONFIG.dataset.acl,
)
assert recipes.exists(ds)
assert (
validate.validate_against_existing_versions(ds, TEST_OUTPUT)
== validate.ArchiveAction.do_nothing
)

def test_existing(self, create_buckets):
ds = BASIC_CONFIG.dataset
recipes.archive_dataset(BASIC_CONFIG, TEST_OUTPUT)
assert recipes.exists(ds)
assert (
validate.validate_against_existing_versions(ds, TEST_OUTPUT)
== validate.ArchiveAction.update_freshness
)

def test_existing_data_diffs(self, create_buckets):
ds = BASIC_CONFIG.dataset
recipes.archive_dataset(BASIC_CONFIG, TEST_OUTPUT)
assert recipes.exists(ds)
with pytest.raises(FileExistsError):
validate.validate_against_existing_versions(
ds, TEST_OUTPUT.parent / "test.parquet"
)

0 comments on commit 2e67f53

Please sign in to comment.