Skip to content

Commit

Permalink
test: 🧪 Improve tests
Browse files Browse the repository at this point in the history
  • Loading branch information
Aremaki committed Dec 14, 2022
1 parent 4d4c624 commit 07a856b
Show file tree
Hide file tree
Showing 13 changed files with 162 additions and 35 deletions.
5 changes: 1 addition & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,6 @@
# EDS-TeVa [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/aphp/edsteva/HEAD?labpath=notebooks%2Fsynthetic_data.ipynb)

<p align="center">
<a href="https://aphp.github.io/edsteva/latest/" target="_blank">
<img src="https://img.shields.io/github/workflow/status/aphp/edsteva/Tests%20and%20Linting?label=tests&style=flat" alt="Tests">
</a>
<a href="https://aphp.github.io/edsteva/latest/" target="_blank">
<img src="https://img.shields.io/github/workflow/status/aphp/edsteva/Documentation?label=docs&style=flat" alt="Documentation">
</a>
Expand Down Expand Up @@ -59,7 +56,7 @@ pip install edsteva
We recommend pinning the library version in your projects, or use a strict package manager like [Poetry](https://python-poetry.org/).

```
pip install edsteva==0.1.1
pip install edsteva==0.1.2
```
## Example

Expand Down
2 changes: 1 addition & 1 deletion changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Changelog
## v0.1.2 - 13-12-2022
## v0.1.2 - 14-12-2022

- ConditionProbe computes the availability of administrative data related to visits with at least one ICD-10 code recorded.
## v0.1.1 - 03-12-2022
Expand Down
5 changes: 1 addition & 4 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,6 @@
</a>
</p>
<p align="center">
<a href="https://aphp.github.io/edsteva/latest/" target="_blank">
<img src="https://img.shields.io/github/workflow/status/aphp/edsteva/Tests%20and%20Linting?label=tests&style=flat" alt="Tests">
</a>
<a href="https://aphp.github.io/edsteva/latest/" target="_blank">
<img src="https://img.shields.io/github/workflow/status/aphp/edsteva/Documentation?label=docs&style=flat" alt="Documentation">
</a>
Expand Down Expand Up @@ -97,7 +94,7 @@ color:green Successfully installed edsteva
We recommend pinning the library version in your projects, or use a strict package manager like [Poetry](https://python-poetry.org/).

```
pip install edsteva==0.1.1
pip install edsteva==0.1.2
```
## Working example: administrative records relative to visits

Expand Down
2 changes: 1 addition & 1 deletion edsteva/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "0.1.1"
__version__ = "0.1.2"


import importlib
Expand Down
6 changes: 4 additions & 2 deletions edsteva/io/i2b2_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

def get_i2b2_table(
spark_session: SparkSession, db_name: str, db_source: str, table: str
) -> SparkDataFrame:
) -> SparkDataFrame: # pragma: no cover
"""
Retrieve a Spark table in i2b2 and transform it to fit with OMOP standard.
Expand Down Expand Up @@ -161,7 +161,9 @@ def get_i2b2_table(
return df


def mapping_dict(mapping: Dict[str, str], Non_renseigne: str) -> FunctionUDF:
def mapping_dict(
mapping: Dict[str, str], Non_renseigne: str
) -> FunctionUDF: # pragma: no cover
"""
Returns a function that maps data according to a mapping dictionnary in a Spark DataFrame.
Expand Down
30 changes: 21 additions & 9 deletions edsteva/io/synthetic/synthetic.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
from dataclasses import dataclass, field
from datetime import datetime
from typing import Dict, Tuple
from typing import Dict, List, Tuple, Union

import numpy as np
import pandas as pd
from databricks import koalas as ks
from loguru import logger

from edsteva.io.synthetic.care_site import generate_care_site_tables
from edsteva.io.synthetic.utils import recursive_items
Expand All @@ -16,6 +17,8 @@
generate_before_t0,
)

DataFrame = Union[ks.DataFrame, pd.DataFrame]

CARE_SITE_STRUCTURE = {
"Hôpital-1": {
"Pôle/DMU-11": {
Expand Down Expand Up @@ -323,21 +326,15 @@ def generate(self):
visit_detail = self._generate_visit_detail(visit_occurrence)
note = self._generate_note(hospital_ids, visit_occurrence)

self.available_tables = [
"care_site",
"visit_occurrence",
"condition_occurrence",
"fact_relationship",
"visit_detail",
"note",
]
self.care_site = care_site
self.visit_occurrence = visit_occurrence
self.condition_occurrence = condition_occurrence
self.fact_relationship = fact_relationship
self.visit_detail = visit_detail
self.note = note

self.list_available_tables()

if self.module == "koalas":
self.convert_to_koalas()
return self
Expand Down Expand Up @@ -523,3 +520,18 @@ def reset_to_pandas(self):
self.visit_detail = self.visit_detail.to_pandas()
self.note = self.note.to_pandas()
self.module = "pandas"

def delete_table(self, table_name: str) -> None:
if hasattr(self, table_name):
delattr(self, table_name)
logger.info("Table {} has been deleted", table_name)
else:
logger.info("Table {} does not exist", table_name)
self.list_available_tables()

def list_available_tables(self) -> List[str]:
available_tables = []
for key, item in self.__dict__.items():
if isinstance(item, DataFrame.__args__):
available_tables.append(key)
self.available_tables = available_tables
4 changes: 2 additions & 2 deletions edsteva/probes/note.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def get_uf_visit(
visit_detail,
care_site,
care_site_relationship,
):
): # pragma: no cover
# Load Orbis note and Uf for Note
note_orbis = extra_data.orbis_document[
[
Expand Down Expand Up @@ -151,7 +151,7 @@ def get_uf_visit(
return uf_visit


def get_pole_visit(uf_visit, care_site, care_site_relationship):
def get_pole_visit(uf_visit, care_site, care_site_relationship): # pragma: no cover

pole_visit = convert_table_to_pole(
table=uf_visit.drop(columns=["care_site_short_name", "care_site_level"]),
Expand Down
12 changes: 12 additions & 0 deletions edsteva/probes/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,18 @@


def prepare_visit_occurrence(data, start_date, end_date, stay_types):
check_columns(
data.visit_occurrence,
required_columns=[
"visit_occurrence_id",
"visit_source_value",
"visit_start_datetime",
"care_site_id",
"row_status_source_value",
"visit_occurrence_source_value",
],
df_name="visit_occurrence",
)
visit_occurrence = data.visit_occurrence[
[
"visit_occurrence_id",
Expand Down
13 changes: 3 additions & 10 deletions edsteva/utils/checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ class MissingTableError(Exception):
def __init__(
self,
required_tables: Union[List, dict],
data_name: str = "",
):

if isinstance(required_tables, dict):
Expand All @@ -50,13 +49,7 @@ def __init__(
to_display_per_concept = [f"- {concept}" for concept in required_tables]
str_to_display = "\n".join(to_display_per_concept)

if data_name:
data_name = f" {data_name} "
message = (
f"The{data_name}Data is missing some tables, "
"namely:\n"
f"{str_to_display}"
)
message = f"Data is missing some tables, namely:\n {str_to_display}"

super().__init__(message)

Expand All @@ -68,8 +61,8 @@ def check_columns(df: DataFrame, required_columns: List[str], df_name: str = "")
raise MissingColumnError(missing_columns, df_name=df_name)


def check_tables(data: Data, required_tables: List[str], data_name: str = ""):
def check_tables(data: Data, required_tables: List[str]):
present_tables = set(data.available_tables)
missing_tables = set(required_tables) - present_tables
if missing_tables:
raise MissingTableError(missing_tables, data_name=data_name)
raise MissingTableError(missing_tables)
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "edsteva"
version = "0.1.1"
version = "0.1.2"
description = "EDS-TeVa provides a set of tools that aims at modeling the adoption over time and across space of the Electronic Health Records."
authors = ["Adam Remaki <[email protected]>", "Vicent Maladiere <[email protected]>", "Benoit Playe <[email protected]>", "Romain Bey <[email protected]>", "Paul Bernard <[email protected]>"]
keywords = ["OMOP", "Data Analysis", "Electronic health record"]
Expand Down
64 changes: 64 additions & 0 deletions tests/test_convert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import pandas as pd
import pytest
from databricks import koalas as ks

from edsteva.utils import framework


@pytest.fixture()
def example_objects():
return dict(
pandas=[
pd.DataFrame({"col": [1, 2, 3]}),
pd.Series([4, 5, 6]),
],
koalas=[
ks.DataFrame({"val": [7, 8, 9]}),
ks.Series([10, 11, 12]),
],
)


def test_identify_pandas(example_objects):
for obj in example_objects["pandas"]:
assert framework.is_pandas(obj) is True
assert framework.is_koalas(obj) is False
assert framework.get_framework(obj) is pd


def test_identify_koalas(example_objects):
for obj in example_objects["koalas"]:
assert framework.is_pandas(obj) is False
assert framework.is_koalas(obj) is True
assert framework.get_framework(obj) is ks


def test_framework_pandas(example_objects):
for obj in example_objects["pandas"]:
converted = framework.pandas(obj)
assert converted is obj

for obj in example_objects["koalas"]:
converted = framework.pandas(obj)
assert framework.is_pandas(converted) is True


def test_framework_koalas(example_objects):
for obj in example_objects["pandas"]:
converted = framework.koalas(obj)
assert framework.is_koalas(converted) is True

for obj in example_objects["koalas"]:
converted = framework.koalas(obj)
assert converted is obj


def test_unconvertible_objects():
objects = [1, "coucou", {"a": [1, 2]}, [1, 2, 3], 2.5, ks, pd]
for obj in objects:
with pytest.raises(ValueError):
framework.pandas(obj)

for obj in objects:
with pytest.raises(ValueError):
framework.koalas(obj)
11 changes: 11 additions & 0 deletions tests/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import pandas as pd
import pytest

from edsteva import CACHE_DIR
from edsteva.io import SyntheticData
from edsteva.metrics import error, error_after_t0
from edsteva.models.rectangle_function import RectangleFunction
Expand Down Expand Up @@ -47,6 +48,16 @@ def test_step_function_visit_occurence():
start_date=data.t_min,
end_date=data.t_max,
)

# Test Cache saving
visit_model.save()
assert os.path.isfile(CACHE_DIR / "edsteva" / "models" / "stepfunction.pickle")
visit_model = StepFunction()
visit_model.load()
visit_model.delete()
assert not os.path.isfile(CACHE_DIR / "edsteva" / "models" / "stepfunction.pickle")

# Test target saving
visit_model.save(
path="test.pickle",
)
Expand Down
41 changes: 40 additions & 1 deletion tests/test_probes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,43 @@

import pytest

from edsteva import improve_performances
from edsteva import CACHE_DIR, improve_performances
from edsteva.io import SyntheticData
from edsteva.probes import ConditionProbe, NoteProbe, VisitProbe
from edsteva.utils.checks import MissingColumnError, MissingTableError

pytestmark = pytest.mark.filterwarnings("ignore")


improve_performances()
data_step = SyntheticData(seed=41, mode="step").generate()
data_rect = SyntheticData(seed=41, mode="rect").generate()
data_missing = SyntheticData(seed=41, mode="step").generate()


def test_missing_checks():
with pytest.raises(TypeError):
data_fake = [1, 2, 3]
visit = VisitProbe()
visit.compute(
data=data_fake,
)
with pytest.raises(MissingColumnError):
data_missing.visit_occurrence = data_missing.visit_occurrence.drop(
columns="visit_occurrence_id"
)
visit = VisitProbe()
visit.compute(
data=data_missing,
)
with pytest.raises(MissingTableError):
data_missing.delete_table("unknown_table") # Test typo
data_missing.delete_table("fact_relationship")
visit = VisitProbe()
visit.compute(
data=data_missing,
)


params = [
dict(
Expand Down Expand Up @@ -80,7 +108,18 @@ def test_compute_visit_probe(data, params):
care_site_ids=params["care_site_ids"],
care_site_short_names=params["care_site_short_names"],
)

if params["test_save"]:
# Test Cache saving
visit.save()
assert os.path.isfile(CACHE_DIR / "edsteva" / "probes" / "visitprobe.pickle")
visit = VisitProbe()
visit.load()
visit.delete()
assert not os.path.isfile(
CACHE_DIR / "edsteva" / "probes" / "visitprobe.pickle"
)

visit.save(
path="test.pickle",
)
Expand Down

0 comments on commit 07a856b

Please sign in to comment.