diff --git a/README.md b/README.md index ff8a58bd..b45abcf7 100644 --- a/README.md +++ b/README.md @@ -11,9 +11,6 @@ # EDS-TeVa [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/aphp/edsteva/HEAD?labpath=notebooks%2Fsynthetic_data.ipynb)
- - - @@ -59,7 +56,7 @@ pip install edsteva We recommend pinning the library version in your projects, or use a strict package manager like [Poetry](https://python-poetry.org/). ``` -pip install edsteva==0.1.1 +pip install edsteva==0.1.2 ``` ## Example diff --git a/changelog.md b/changelog.md index d93f7fc5..da41ab59 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,5 @@ # Changelog -## v0.1.2 - 13-12-2022 +## v0.1.2 - 14-12-2022 - ConditionProbe computes the availability of administrative data related to visits with at least one ICD-10 code recorded. ## v0.1.1 - 03-12-2022 diff --git a/docs/index.md b/docs/index.md index 653c6cc3..6d514f2e 100644 --- a/docs/index.md +++ b/docs/index.md @@ -10,9 +10,6 @@
-
-
-
@@ -97,7 +94,7 @@ color:green Successfully installed edsteva
We recommend pinning the library version in your projects, or use a strict package manager like [Poetry](https://python-poetry.org/).
```
-pip install edsteva==0.1.1
+pip install edsteva==0.1.2
```
## Working example: administrative records relative to visits
diff --git a/edsteva/__init__.py b/edsteva/__init__.py
index 8b911e6b..253455c1 100644
--- a/edsteva/__init__.py
+++ b/edsteva/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "0.1.1"
+__version__ = "0.1.2"
import importlib
diff --git a/edsteva/io/i2b2_mapping.py b/edsteva/io/i2b2_mapping.py
index 1a91b7e7..0053a7c6 100644
--- a/edsteva/io/i2b2_mapping.py
+++ b/edsteva/io/i2b2_mapping.py
@@ -16,7 +16,7 @@
def get_i2b2_table(
spark_session: SparkSession, db_name: str, db_source: str, table: str
-) -> SparkDataFrame:
+) -> SparkDataFrame: # pragma: no cover
"""
Retrieve a Spark table in i2b2 and transform it to fit with OMOP standard.
@@ -161,7 +161,9 @@ def get_i2b2_table(
return df
-def mapping_dict(mapping: Dict[str, str], Non_renseigne: str) -> FunctionUDF:
+def mapping_dict(
+ mapping: Dict[str, str], Non_renseigne: str
+) -> FunctionUDF: # pragma: no cover
"""
Returns a function that maps data according to a mapping dictionnary in a Spark DataFrame.
diff --git a/edsteva/io/synthetic/synthetic.py b/edsteva/io/synthetic/synthetic.py
index 056f697b..951e0151 100644
--- a/edsteva/io/synthetic/synthetic.py
+++ b/edsteva/io/synthetic/synthetic.py
@@ -1,10 +1,11 @@
from dataclasses import dataclass, field
from datetime import datetime
-from typing import Dict, Tuple
+from typing import Dict, List, Tuple, Union
import numpy as np
import pandas as pd
from databricks import koalas as ks
+from loguru import logger
from edsteva.io.synthetic.care_site import generate_care_site_tables
from edsteva.io.synthetic.utils import recursive_items
@@ -16,6 +17,8 @@
generate_before_t0,
)
+DataFrame = Union[ks.DataFrame, pd.DataFrame]
+
CARE_SITE_STRUCTURE = {
"Hôpital-1": {
"Pôle/DMU-11": {
@@ -323,14 +326,6 @@ def generate(self):
visit_detail = self._generate_visit_detail(visit_occurrence)
note = self._generate_note(hospital_ids, visit_occurrence)
- self.available_tables = [
- "care_site",
- "visit_occurrence",
- "condition_occurrence",
- "fact_relationship",
- "visit_detail",
- "note",
- ]
self.care_site = care_site
self.visit_occurrence = visit_occurrence
self.condition_occurrence = condition_occurrence
@@ -338,6 +333,8 @@ def generate(self):
self.visit_detail = visit_detail
self.note = note
+ self.list_available_tables()
+
if self.module == "koalas":
self.convert_to_koalas()
return self
@@ -523,3 +520,18 @@ def reset_to_pandas(self):
self.visit_detail = self.visit_detail.to_pandas()
self.note = self.note.to_pandas()
self.module = "pandas"
+
+ def delete_table(self, table_name: str) -> None:
+ if hasattr(self, table_name):
+ delattr(self, table_name)
+ logger.info("Table {} has been deleted", table_name)
+ else:
+ logger.info("Table {} does not exist", table_name)
+ self.list_available_tables()
+
+ def list_available_tables(self) -> List[str]:
+ available_tables = []
+ for key, item in self.__dict__.items():
+ if isinstance(item, DataFrame.__args__):
+ available_tables.append(key)
+ self.available_tables = available_tables
diff --git a/edsteva/probes/note.py b/edsteva/probes/note.py
index 4034f77a..4bda746d 100644
--- a/edsteva/probes/note.py
+++ b/edsteva/probes/note.py
@@ -85,7 +85,7 @@ def get_uf_visit(
visit_detail,
care_site,
care_site_relationship,
-):
+): # pragma: no cover
# Load Orbis note and Uf for Note
note_orbis = extra_data.orbis_document[
[
@@ -151,7 +151,7 @@ def get_uf_visit(
return uf_visit
-def get_pole_visit(uf_visit, care_site, care_site_relationship):
+def get_pole_visit(uf_visit, care_site, care_site_relationship): # pragma: no cover
pole_visit = convert_table_to_pole(
table=uf_visit.drop(columns=["care_site_short_name", "care_site_level"]),
diff --git a/edsteva/probes/utils.py b/edsteva/probes/utils.py
index 7a16009f..f1361079 100644
--- a/edsteva/probes/utils.py
+++ b/edsteva/probes/utils.py
@@ -24,6 +24,18 @@
def prepare_visit_occurrence(data, start_date, end_date, stay_types):
+ check_columns(
+ data.visit_occurrence,
+ required_columns=[
+ "visit_occurrence_id",
+ "visit_source_value",
+ "visit_start_datetime",
+ "care_site_id",
+ "row_status_source_value",
+ "visit_occurrence_source_value",
+ ],
+ df_name="visit_occurrence",
+ )
visit_occurrence = data.visit_occurrence[
[
"visit_occurrence_id",
diff --git a/edsteva/utils/checks.py b/edsteva/utils/checks.py
index 211f5dfb..4ea5c98b 100644
--- a/edsteva/utils/checks.py
+++ b/edsteva/utils/checks.py
@@ -38,7 +38,6 @@ class MissingTableError(Exception):
def __init__(
self,
required_tables: Union[List, dict],
- data_name: str = "",
):
if isinstance(required_tables, dict):
@@ -50,13 +49,7 @@ def __init__(
to_display_per_concept = [f"- {concept}" for concept in required_tables]
str_to_display = "\n".join(to_display_per_concept)
- if data_name:
- data_name = f" {data_name} "
- message = (
- f"The{data_name}Data is missing some tables, "
- "namely:\n"
- f"{str_to_display}"
- )
+ message = f"Data is missing some tables, namely:\n {str_to_display}"
super().__init__(message)
@@ -68,8 +61,8 @@ def check_columns(df: DataFrame, required_columns: List[str], df_name: str = "")
raise MissingColumnError(missing_columns, df_name=df_name)
-def check_tables(data: Data, required_tables: List[str], data_name: str = ""):
+def check_tables(data: Data, required_tables: List[str]):
present_tables = set(data.available_tables)
missing_tables = set(required_tables) - present_tables
if missing_tables:
- raise MissingTableError(missing_tables, data_name=data_name)
+ raise MissingTableError(missing_tables)
diff --git a/pyproject.toml b/pyproject.toml
index 757758fd..8c013522 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[tool.poetry]
name = "edsteva"
-version = "0.1.1"
+version = "0.1.2"
description = "EDS-TeVa provides a set of tools that aims at modeling the adoption over time and across space of the Electronic Health Records."
authors = ["Adam Remaki