From 07a856b7a753adca887d9d1f06fd075c9e5826ec Mon Sep 17 00:00:00 2001
From: Adam <adam.remaki@aphp.fr>
Date: Wed, 14 Dec 2022 11:37:49 +0100
Subject: [PATCH] test: :test_tube: Improve tests

---
 README.md                         |  5 +--
 changelog.md                      |  2 +-
 docs/index.md                     |  5 +--
 edsteva/__init__.py               |  2 +-
 edsteva/io/i2b2_mapping.py        |  6 ++-
 edsteva/io/synthetic/synthetic.py | 30 ++++++++++-----
 edsteva/probes/note.py            |  4 +-
 edsteva/probes/utils.py           | 12 ++++++
 edsteva/utils/checks.py           | 13 ++-----
 pyproject.toml                    |  2 +-
 tests/test_convert.py             | 64 +++++++++++++++++++++++++++++++
 tests/test_model.py               | 11 ++++++
 tests/test_probes.py              | 41 +++++++++++++++++++-
 13 files changed, 162 insertions(+), 35 deletions(-)
 create mode 100644 tests/test_convert.py
diff --git a/README.md b/README.md
index ff8a58bd..b45abcf7 100644
--- a/README.md
+++ b/README.md
@@ -11,9 +11,6 @@
 # EDS-TeVa [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/aphp/edsteva/HEAD?labpath=notebooks%2Fsynthetic_data.ipynb)
 
 <p align="center">
-<a href="https://aphp.github.io/edsteva/latest/" target="_blank">
-    <img src="https://img.shields.io/github/workflow/status/aphp/edsteva/Tests%20and%20Linting?label=tests&style=flat" alt="Tests">
-</a>
 <a href="https://aphp.github.io/edsteva/latest/" target="_blank">
     <img src="https://img.shields.io/github/workflow/status/aphp/edsteva/Documentation?label=docs&style=flat" alt="Documentation">
 </a>
@@ -59,7 +56,7 @@ pip install edsteva
 We recommend pinning the library version in your projects, or use a strict package manager like [Poetry](https://python-poetry.org/).
 
 ```
-pip install edsteva==0.1.1
+pip install edsteva==0.1.2
 ```
 ## Example
 
diff --git a/changelog.md b/changelog.md
index d93f7fc5..da41ab59 100644
--- a/changelog.md
+++ b/changelog.md
@@ -1,5 +1,5 @@
 # Changelog
-## v0.1.2 - 13-12-2022
+## v0.1.2 - 14-12-2022
 
 - ConditionProbe computes the availability of administrative data related to visits with at least one ICD-10 code recorded.
 ## v0.1.1 - 03-12-2022
diff --git a/docs/index.md b/docs/index.md
index 653c6cc3..6d514f2e 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -10,9 +10,6 @@
 </a>
 </p>
 <p align="center">
-<a href="https://aphp.github.io/edsteva/latest/" target="_blank">
-    <img src="https://img.shields.io/github/workflow/status/aphp/edsteva/Tests%20and%20Linting?label=tests&style=flat" alt="Tests">
-</a>
 <a href="https://aphp.github.io/edsteva/latest/" target="_blank">
     <img src="https://img.shields.io/github/workflow/status/aphp/edsteva/Documentation?label=docs&style=flat" alt="Documentation">
 </a>
@@ -97,7 +94,7 @@ color:green Successfully installed edsteva
 We recommend pinning the library version in your projects, or use a strict package manager like [Poetry](https://python-poetry.org/).
 
 ```
-pip install edsteva==0.1.1
+pip install edsteva==0.1.2
 ```
 ## Working example: administrative records relative to visits
 
diff --git a/edsteva/__init__.py b/edsteva/__init__.py
index 8b911e6b..253455c1 100644
--- a/edsteva/__init__.py
+++ b/edsteva/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "0.1.1"
+__version__ = "0.1.2"
 
 
 import importlib
diff --git a/edsteva/io/i2b2_mapping.py b/edsteva/io/i2b2_mapping.py
index 1a91b7e7..0053a7c6 100644
--- a/edsteva/io/i2b2_mapping.py
+++ b/edsteva/io/i2b2_mapping.py
@@ -16,7 +16,7 @@
 
 def get_i2b2_table(
     spark_session: SparkSession, db_name: str, db_source: str, table: str
-) -> SparkDataFrame:
+) -> SparkDataFrame:  # pragma: no cover
     """
     Retrieve a Spark table in i2b2 and transform it to fit with OMOP standard.
 
@@ -161,7 +161,9 @@ def get_i2b2_table(
     return df
 
 
-def mapping_dict(mapping: Dict[str, str], Non_renseigne: str) -> FunctionUDF:
+def mapping_dict(
+    mapping: Dict[str, str], Non_renseigne: str
+) -> FunctionUDF:  # pragma: no cover
     """
     Returns a function that maps data according to a mapping dictionnary in a Spark DataFrame.
 
diff --git a/edsteva/io/synthetic/synthetic.py b/edsteva/io/synthetic/synthetic.py
index 056f697b..951e0151 100644
--- a/edsteva/io/synthetic/synthetic.py
+++ b/edsteva/io/synthetic/synthetic.py
@@ -1,10 +1,11 @@
 from dataclasses import dataclass, field
 from datetime import datetime
-from typing import Dict, Tuple
+from typing import Dict, List, Tuple, Union
 
 import numpy as np
 import pandas as pd
 from databricks import koalas as ks
+from loguru import logger
 
 from edsteva.io.synthetic.care_site import generate_care_site_tables
 from edsteva.io.synthetic.utils import recursive_items
@@ -16,6 +17,8 @@
     generate_before_t0,
 )
 
+DataFrame = Union[ks.DataFrame, pd.DataFrame]
+
 CARE_SITE_STRUCTURE = {
     "Hôpital-1": {
         "Pôle/DMU-11": {
@@ -323,14 +326,6 @@ def generate(self):
         visit_detail = self._generate_visit_detail(visit_occurrence)
         note = self._generate_note(hospital_ids, visit_occurrence)
 
-        self.available_tables = [
-            "care_site",
-            "visit_occurrence",
-            "condition_occurrence",
-            "fact_relationship",
-            "visit_detail",
-            "note",
-        ]
         self.care_site = care_site
         self.visit_occurrence = visit_occurrence
         self.condition_occurrence = condition_occurrence
@@ -338,6 +333,8 @@ def generate(self):
         self.visit_detail = visit_detail
         self.note = note
 
+        self.list_available_tables()
+
         if self.module == "koalas":
             self.convert_to_koalas()
         return self
@@ -523,3 +520,18 @@ def reset_to_pandas(self):
         self.visit_detail = self.visit_detail.to_pandas()
         self.note = self.note.to_pandas()
         self.module = "pandas"
+
+    def delete_table(self, table_name: str) -> None:
+        if hasattr(self, table_name):
+            delattr(self, table_name)
+            logger.info("Table {} has been deleted", table_name)
+        else:
+            logger.info("Table {} does not exist", table_name)
+        self.list_available_tables()
+
+    def list_available_tables(self) -> List[str]:
+        available_tables = []
+        for key, item in self.__dict__.items():
+            if isinstance(item, DataFrame.__args__):
+                available_tables.append(key)
+        self.available_tables = available_tables
diff --git a/edsteva/probes/note.py b/edsteva/probes/note.py
index 4034f77a..4bda746d 100644
--- a/edsteva/probes/note.py
+++ b/edsteva/probes/note.py
@@ -85,7 +85,7 @@ def get_uf_visit(
     visit_detail,
     care_site,
     care_site_relationship,
-):
+):  # pragma: no cover
     # Load Orbis note and Uf for Note
     note_orbis = extra_data.orbis_document[
         [
@@ -151,7 +151,7 @@ def get_uf_visit(
     return uf_visit
 
 
-def get_pole_visit(uf_visit, care_site, care_site_relationship):
+def get_pole_visit(uf_visit, care_site, care_site_relationship):  # pragma: no cover
 
     pole_visit = convert_table_to_pole(
         table=uf_visit.drop(columns=["care_site_short_name", "care_site_level"]),
diff --git a/edsteva/probes/utils.py b/edsteva/probes/utils.py
index 7a16009f..f1361079 100644
--- a/edsteva/probes/utils.py
+++ b/edsteva/probes/utils.py
@@ -24,6 +24,18 @@
 
 
 def prepare_visit_occurrence(data, start_date, end_date, stay_types):
+    check_columns(
+        data.visit_occurrence,
+        required_columns=[
+            "visit_occurrence_id",
+            "visit_source_value",
+            "visit_start_datetime",
+            "care_site_id",
+            "row_status_source_value",
+            "visit_occurrence_source_value",
+        ],
+        df_name="visit_occurrence",
+    )
     visit_occurrence = data.visit_occurrence[
         [
             "visit_occurrence_id",
diff --git a/edsteva/utils/checks.py b/edsteva/utils/checks.py
index 211f5dfb..4ea5c98b 100644
--- a/edsteva/utils/checks.py
+++ b/edsteva/utils/checks.py
@@ -38,7 +38,6 @@ class MissingTableError(Exception):
     def __init__(
         self,
         required_tables: Union[List, dict],
-        data_name: str = "",
     ):
 
         if isinstance(required_tables, dict):
@@ -50,13 +49,7 @@ def __init__(
             to_display_per_concept = [f"- {concept}" for concept in required_tables]
         str_to_display = "\n".join(to_display_per_concept)
 
-        if data_name:
-            data_name = f" {data_name} "
-        message = (
-            f"The{data_name}Data is missing some tables, "
-            "namely:\n"
-            f"{str_to_display}"
-        )
+        message = f"Data is missing some tables, namely:\n {str_to_display}"
 
         super().__init__(message)
 
@@ -68,8 +61,8 @@ def check_columns(df: DataFrame, required_columns: List[str], df_name: str = "")
         raise MissingColumnError(missing_columns, df_name=df_name)
 
 
-def check_tables(data: Data, required_tables: List[str], data_name: str = ""):
+def check_tables(data: Data, required_tables: List[str]):
     present_tables = set(data.available_tables)
     missing_tables = set(required_tables) - present_tables
     if missing_tables:
-        raise MissingTableError(missing_tables, data_name=data_name)
+        raise MissingTableError(missing_tables)
diff --git a/pyproject.toml b/pyproject.toml
index 757758fd..8c013522 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "edsteva"
-version = "0.1.1"
+version = "0.1.2"
 description = "EDS-TeVa provides a set of tools that aims at modeling the adoption over time and across space of the Electronic Health Records."
 authors = ["Adam Remaki <adam.remaki@aphp.fr>", "Vicent Maladiere <vincent.maladiere-ext@aphp.fr>", "Benoit Playe <benoit.playe@aphp.fr>", "Romain Bey <romain.bey@aphp.fr>", "Paul Bernard <paul.bernard@aphp.fr>"]
 keywords = ["OMOP", "Data Analysis", "Electronic health record"]
diff --git a/tests/test_convert.py b/tests/test_convert.py
new file mode 100644
index 00000000..5b514e9b
--- /dev/null
+++ b/tests/test_convert.py
@@ -0,0 +1,64 @@
+import pandas as pd
+import pytest
+from databricks import koalas as ks
+
+from edsteva.utils import framework
+
+
+@pytest.fixture()
+def example_objects():
+    return dict(
+        pandas=[
+            pd.DataFrame({"col": [1, 2, 3]}),
+            pd.Series([4, 5, 6]),
+        ],
+        koalas=[
+            ks.DataFrame({"val": [7, 8, 9]}),
+            ks.Series([10, 11, 12]),
+        ],
+    )
+
+
+def test_identify_pandas(example_objects):
+    for obj in example_objects["pandas"]:
+        assert framework.is_pandas(obj) is True
+        assert framework.is_koalas(obj) is False
+        assert framework.get_framework(obj) is pd
+
+
+def test_identify_koalas(example_objects):
+    for obj in example_objects["koalas"]:
+        assert framework.is_pandas(obj) is False
+        assert framework.is_koalas(obj) is True
+        assert framework.get_framework(obj) is ks
+
+
+def test_framework_pandas(example_objects):
+    for obj in example_objects["pandas"]:
+        converted = framework.pandas(obj)
+        assert converted is obj
+
+    for obj in example_objects["koalas"]:
+        converted = framework.pandas(obj)
+        assert framework.is_pandas(converted) is True
+
+
+def test_framework_koalas(example_objects):
+    for obj in example_objects["pandas"]:
+        converted = framework.koalas(obj)
+        assert framework.is_koalas(converted) is True
+
+    for obj in example_objects["koalas"]:
+        converted = framework.koalas(obj)
+        assert converted is obj
+
+
+def test_unconvertible_objects():
+    objects = [1, "coucou", {"a": [1, 2]}, [1, 2, 3], 2.5, ks, pd]
+    for obj in objects:
+        with pytest.raises(ValueError):
+            framework.pandas(obj)
+
+    for obj in objects:
+        with pytest.raises(ValueError):
+            framework.koalas(obj)
diff --git a/tests/test_model.py b/tests/test_model.py
index bcb38311..d0c6b672 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -3,6 +3,7 @@
 import pandas as pd
 import pytest
 
+from edsteva import CACHE_DIR
 from edsteva.io import SyntheticData
 from edsteva.metrics import error, error_after_t0
 from edsteva.models.rectangle_function import RectangleFunction
@@ -47,6 +48,16 @@ def test_step_function_visit_occurence():
         start_date=data.t_min,
         end_date=data.t_max,
     )
+
+    # Test Cache saving
+    visit_model.save()
+    assert os.path.isfile(CACHE_DIR / "edsteva" / "models" / "stepfunction.pickle")
+    visit_model = StepFunction()
+    visit_model.load()
+    visit_model.delete()
+    assert not os.path.isfile(CACHE_DIR / "edsteva" / "models" / "stepfunction.pickle")
+
+    # Test target saving
     visit_model.save(
         path="test.pickle",
     )
diff --git a/tests/test_probes.py b/tests/test_probes.py
index 6c22f6bd..65ba8c5c 100644
--- a/tests/test_probes.py
+++ b/tests/test_probes.py
@@ -3,15 +3,43 @@
 
 import pytest
 
-from edsteva import improve_performances
+from edsteva import CACHE_DIR, improve_performances
 from edsteva.io import SyntheticData
 from edsteva.probes import ConditionProbe, NoteProbe, VisitProbe
+from edsteva.utils.checks import MissingColumnError, MissingTableError
 
 pytestmark = pytest.mark.filterwarnings("ignore")
 
+
 improve_performances()
 data_step = SyntheticData(seed=41, mode="step").generate()
 data_rect = SyntheticData(seed=41, mode="rect").generate()
+data_missing = SyntheticData(seed=41, mode="step").generate()
+
+
+def test_missing_checks():
+    with pytest.raises(TypeError):
+        data_fake = [1, 2, 3]
+        visit = VisitProbe()
+        visit.compute(
+            data=data_fake,
+        )
+    with pytest.raises(MissingColumnError):
+        data_missing.visit_occurrence = data_missing.visit_occurrence.drop(
+            columns="visit_occurrence_id"
+        )
+        visit = VisitProbe()
+        visit.compute(
+            data=data_missing,
+        )
+    with pytest.raises(MissingTableError):
+        data_missing.delete_table("unknown_table")  # Test typo
+        data_missing.delete_table("fact_relationship")
+        visit = VisitProbe()
+        visit.compute(
+            data=data_missing,
+        )
+
 
 params = [
     dict(
@@ -80,7 +108,18 @@ def test_compute_visit_probe(data, params):
         care_site_ids=params["care_site_ids"],
         care_site_short_names=params["care_site_short_names"],
     )
+
     if params["test_save"]:
+        # Test Cache saving
+        visit.save()
+        assert os.path.isfile(CACHE_DIR / "edsteva" / "probes" / "visitprobe.pickle")
+        visit = VisitProbe()
+        visit.load()
+        visit.delete()
+        assert not os.path.isfile(
+            CACHE_DIR / "edsteva" / "probes" / "visitprobe.pickle"
+        )
+
         visit.save(
             path="test.pickle",
         )