Skip to content

Commit

Permalink
fix test
Browse files Browse the repository at this point in the history
  • Loading branch information
svittoz committed Feb 8, 2024
1 parent 1453a0c commit 33f7a0e
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 43 deletions.
14 changes: 8 additions & 6 deletions eds_scikit/biology/utils/prepare_measurement.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,12 +80,14 @@ def prepare_measurement_table(
logger.info(
"Lazy preparation not available if convert_units=True. Computed table will be cached."
)
measurement.cache() if is_koalas(measurement) else None
conversion_table = (
to("koalas", get_conversion_table(measurement, concept_sets))
if is_koalas(measurement)
else get_conversion_table(measurement, concept_sets)
)
if is_koalas(measurement):
measurement.cache()
conversion_table = to(

Check warning on line 85 in eds_scikit/biology/utils/prepare_measurement.py

View check run for this annotation

Codecov / codecov/patch

eds_scikit/biology/utils/prepare_measurement.py#L84-L85

Added lines #L84 - L85 were not covered by tests
"koalas", get_conversion_table(measurement, concept_sets)
)
else:
conversion_table = get_conversion_table(measurement, concept_sets)

measurement = measurement.merge(
conversion_table, on=["concept_set", "unit_source_value"]
)
Expand Down
27 changes: 0 additions & 27 deletions eds_scikit/biology/utils/process_measurement.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
from datetime import datetime
from typing import Tuple

from loguru import logger

from eds_scikit.utils.checks import check_columns
from eds_scikit.utils.framework import get_framework, to
from eds_scikit.utils.typing import DataFrame
Expand Down Expand Up @@ -54,10 +52,6 @@ def filter_measurement_by_date(
df=measurement, required_columns=["measurement_date"], df_name="measurment"
)

if "measurement_datetime" in measurement.columns:
measurement = measurement
# measurement = _select_adequate_date_column(measurement=measurement)

measurement.measurement_date = measurement.measurement_date.astype("datetime64[ns]")

measurement.dropna(subset=["measurement_date"], inplace=True)
Expand Down Expand Up @@ -171,24 +165,3 @@ def normalize_unit(measurement: DataFrame):
measurement["unit_source_value"].str.lower().fillna("Unknown")
)
return measurement


def _select_adequate_date_column(measurement: DataFrame):
missing_date = measurement.measurement_date.isna().sum()
if missing_date > 0:
missing_datetime = measurement.measurement_datetime.isna().sum()
if missing_date > missing_datetime:
measurement = measurement.drop(columns="measurement_date").rename(
columns={"measurement_datetime": "measurement_date"}
)
logger.warning(
"As the measurement_date column is not reliable ({} missing dates), it has been replaced by the measurement_datetime column ({} missing datetimes)",
missing_date,
missing_datetime,
)
missing_date = missing_datetime
else:
measurement = measurement.drop(columns="measurement_datetime")
else:
measurement = measurement.drop(columns="measurement_datetime")
return measurement
5 changes: 4 additions & 1 deletion eds_scikit/biology/viz/stats_summary.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from eds_scikit.utils.framework import to


def measurement_values_summary(measurement, category_cols, value_column, unit_column):
# Count measurements with no units

Expand Down Expand Up @@ -68,6 +71,6 @@ def measurement_values_summary(measurement, category_cols, value_column, unit_co
[*stats_summary.columns[::-1][:3], *stats_summary.columns[:-3]]
]

stats_summary = stats_summary.to_pandas()
stats_summary = to("pandas", stats_summary)

return stats_summary
41 changes: 32 additions & 9 deletions tests/test_biology.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
import numpy as np
import pytest

from eds_scikit.biology import (
ConceptsSet,
bioclean,
measurement_values_summary,
plot_biology_summary,
prepare_measurement_table,
)
from eds_scikit.biology.utils.prepare_relationship import (
prepare_biology_relationship_table,
)
from eds_scikit.biology.utils.process_concepts import fetch_all_concepts_set
from eds_scikit.biology.utils.process_units import Units
from eds_scikit.datasets import load_biology_data
Expand Down Expand Up @@ -83,12 +88,17 @@ def test_units(data):
units = Units()

units.add_target_unit("g")

assert units.can_be_converted("g", "mg")
assert not units.can_be_converted("g", "l")

assert units.get_category("m/h/xxxx") == ["m", "Time", "Unkown"]
assert units.to_base("mm") == 0.001
assert np.isnan(units.to_base("xxxx"))

assert abs(units.convert_unit("L", "ml") - 1000) < 1e-6
assert abs(units.convert_unit("m/s", "m/h") - 3600.0) < 1e-6

assert np.isnan(units.convert_unit("m/s/xxxx", "m/h/s"))
units.add_conversion("mol", "g", 10)
assert abs(units.convert_unit("g", "mol") - 0.1) < 1e-6

Expand All @@ -101,16 +111,15 @@ def test_prepare_measurement(data, concepts_sets):
convert_units=True,
start_date=data.t_start,
end_date=data.t_end,
)

measurement = prepare_measurement_table(
data=data,
concept_sets=concepts_sets,
convert_units=False,
start_date=data.t_start,
end_date=data.t_end,
get_all_terminologies=False,
)

try:
prepare_biology_relationship_table(
data, concepts_sets, get_all_terminologies=False
)
except Exception:
pass
try:
plot_biology_summary(measurement)
except ValueError:
Expand All @@ -125,3 +134,17 @@ def test_prepare_measurement(data, concepts_sets):
)

plot_biology_summary(measurement, "value_as_number", terminologies=["GLIMS_ANABIO"])

measurement_values_summary(
measurement, ["concept_set"], "value_as_number", "unit_source_value"
)

data.convert_to_koalas()

measurement = prepare_measurement_table(
data=data,
concept_sets=concepts_sets,
convert_units=False,
start_date=data.t_start,
end_date=data.t_end,
)

0 comments on commit 33f7a0e

Please sign in to comment.