From a57b167e5b679fd9d4a429562487f12a8dcf3c04 Mon Sep 17 00:00:00 2001 From: svittoz Date: Wed, 17 Apr 2024 14:56:13 +0000 Subject: [PATCH] pre-commit --- eds_scikit/period/stays.py | 29 +++++++------ eds_scikit/utils/sort_first_koalas.py | 4 +- tests/test_sort_first_koalas.py | 61 ++++++++++++++++++++------- 3 files changed, 65 insertions(+), 29 deletions(-) diff --git a/eds_scikit/period/stays.py b/eds_scikit/period/stays.py index 5d3532e4..8fcedc29 100644 --- a/eds_scikit/period/stays.py +++ b/eds_scikit/period/stays.py @@ -6,10 +6,9 @@ from eds_scikit.utils.checks import MissingConceptError, algo_checker, concept_checker from eds_scikit.utils.datetime_helpers import substract_datetime from eds_scikit.utils.framework import get_framework -from eds_scikit.utils.typing import DataFrame from eds_scikit.utils.sort_first_koalas import sort_values_first_koalas +from eds_scikit.utils.typing import DataFrame -import pandas as pd def cleaning( vo, @@ -71,6 +70,7 @@ def cleaning( return vo[~mask], vo[mask] + @concept_checker(concepts=["STAY_ID", "CONTIGUOUS_STAY_ID"]) def merge_visits( vo: DataFrame, @@ -291,23 +291,26 @@ def get_first( right_index=True, how="inner", ) - - first_visit = sort_values_first_koalas(merged, - by_cols=["visit_occurrence_id_2"], - cols=[flag_name, "visit_start_datetime_1"], - ascending=False - ).rename( + + first_visit = sort_values_first_koalas( + merged, + by_cols=["visit_occurrence_id_2"], + cols=[flag_name, "visit_start_datetime_1"], + ascending=False, + ).rename( columns={ "visit_occurrence_id_1": f"{concept_prefix}STAY_ID", "visit_occurrence_id_2": "visit_occurrence_id", - })[[f"{concept_prefix}STAY_ID", "visit_occurrence_id"]] - - + } + )[ + [f"{concept_prefix}STAY_ID", "visit_occurrence_id"] + ] + return merged, first_visit - merged, first_contiguous_visit = get_first(merged, contiguous_only=True) + merged, first_contiguous_visit = get_first(merged, contiguous_only=True) merged, first_visit = get_first(merged, contiguous_only=False) - + # Concatenating merge visits with previously discarded ones results = fw.concat( [ diff --git a/eds_scikit/utils/sort_first_koalas.py b/eds_scikit/utils/sort_first_koalas.py index 22355982..f4f93628 100644 --- a/eds_scikit/utils/sort_first_koalas.py +++ b/eds_scikit/utils/sort_first_koalas.py @@ -1,6 +1,8 @@ def sort_values_first_koalas(dataframe, by_cols, cols, ascending=True): for col in cols: dataframe_min_max = dataframe.groupby(by_cols, as_index=False)[col] - dataframe_min_max = dataframe_min_max.min() if ascending else dataframe_min_max.max() + dataframe_min_max = ( + dataframe_min_max.min() if ascending else dataframe_min_max.max() + ) dataframe = dataframe.merge(dataframe_min_max, on=[*by_cols, col], how="right") return dataframe diff --git a/tests/test_sort_first_koalas.py b/tests/test_sort_first_koalas.py index bdb4cdef..eac1cf3a 100644 --- a/tests/test_sort_first_koalas.py +++ b/tests/test_sort_first_koalas.py @@ -1,25 +1,53 @@ import pandas as pd -from eds_scikit.utils.sort_first_koalas import sort_values_first_koalas -from numpy import array import pytest +from numpy import array + from eds_scikit.utils import framework +from eds_scikit.utils.sort_first_koalas import sort_values_first_koalas from eds_scikit.utils.test_utils import assert_equal_no_order data = { - 'A': array(['X', 'Y', 'X', 'Y', 'Y', 'Z', 'X', 'Z', 'X', 'X', 'X', 'Z', 'Y', 'Z', 'Z', 'X', 'Y', 'Y', 'Y', 'Y'], dtype='