From a3607f43727f75447f8d60219cb4279c6816a4b5 Mon Sep 17 00:00:00 2001 From: Matthew McDermott Date: Sun, 1 Sep 2024 14:26:57 -0400 Subject: [PATCH 1/5] Rearranging things --- pyproject.toml | 4 ++-- src/{MEDS-DEV => MEDS_DEV}/__init__.py | 0 src/MEDS_DEV/configs/_ACES_MD.yaml | 23 +++++++++++++++++++ .../datasets/MIMIC-IV/README.md | 0 .../datasets/MIMIC-IV/predicates.yaml | 0 src/{MEDS-DEV => MEDS_DEV}/datasets/README.md | 0 src/{MEDS-DEV => MEDS_DEV}/tasks/README.md | 0 .../tasks/criteria/README.md | 0 .../tasks/criteria/abnormal_lab/README.md | 0 .../reduced_echo_derived_LVEF_via_ECG.yaml | 0 .../criteria/disease_progression/README.md | 0 .../disease_progression/diabetes/README.md | 0 .../diabetes/ckd_post_kidney_labs.yaml | 0 .../tasks/criteria/mortality/README.md | 0 .../tasks/criteria/mortality/in_icu/README.md | 0 .../criteria/mortality/in_icu/first_24h.yaml | 0 .../tasks/criteria/phenotyping/README.md | 0 .../phenotyping/outpatient/MI/1y-5y.yaml | 0 .../phenotyping/outpatient/MI/README.md | 0 .../criteria/phenotyping/outpatient/README.md | 0 .../tasks/criteria/readmission/README.md | 0 .../readmission/general_hospital/30d.yaml | 0 .../readmission/general_hospital/README.md | 0 23 files changed, 25 insertions(+), 2 deletions(-) rename src/{MEDS-DEV => MEDS_DEV}/__init__.py (100%) create mode 100644 src/MEDS_DEV/configs/_ACES_MD.yaml rename src/{MEDS-DEV => MEDS_DEV}/datasets/MIMIC-IV/README.md (100%) rename src/{MEDS-DEV => MEDS_DEV}/datasets/MIMIC-IV/predicates.yaml (100%) rename src/{MEDS-DEV => MEDS_DEV}/datasets/README.md (100%) rename src/{MEDS-DEV => MEDS_DEV}/tasks/README.md (100%) rename src/{MEDS-DEV => MEDS_DEV}/tasks/criteria/README.md (100%) rename src/{MEDS-DEV => MEDS_DEV}/tasks/criteria/abnormal_lab/README.md (100%) rename src/{MEDS-DEV => MEDS_DEV}/tasks/criteria/abnormal_lab/reduced_echo_derived_LVEF_via_ECG.yaml (100%) rename src/{MEDS-DEV => MEDS_DEV}/tasks/criteria/disease_progression/README.md (100%) rename src/{MEDS-DEV => MEDS_DEV}/tasks/criteria/disease_progression/diabetes/README.md (100%) rename src/{MEDS-DEV => MEDS_DEV}/tasks/criteria/disease_progression/diabetes/ckd_post_kidney_labs.yaml (100%) rename src/{MEDS-DEV => MEDS_DEV}/tasks/criteria/mortality/README.md (100%) rename src/{MEDS-DEV => MEDS_DEV}/tasks/criteria/mortality/in_icu/README.md (100%) rename src/{MEDS-DEV => MEDS_DEV}/tasks/criteria/mortality/in_icu/first_24h.yaml (100%) rename src/{MEDS-DEV => MEDS_DEV}/tasks/criteria/phenotyping/README.md (100%) rename src/{MEDS-DEV => MEDS_DEV}/tasks/criteria/phenotyping/outpatient/MI/1y-5y.yaml (100%) rename src/{MEDS-DEV => MEDS_DEV}/tasks/criteria/phenotyping/outpatient/MI/README.md (100%) rename src/{MEDS-DEV => MEDS_DEV}/tasks/criteria/phenotyping/outpatient/README.md (100%) rename src/{MEDS-DEV => MEDS_DEV}/tasks/criteria/readmission/README.md (100%) rename src/{MEDS-DEV => MEDS_DEV}/tasks/criteria/readmission/general_hospital/30d.yaml (100%) rename src/{MEDS-DEV => MEDS_DEV}/tasks/criteria/readmission/general_hospital/README.md (100%) diff --git a/pyproject.toml b/pyproject.toml index a0d0091..cc51b8b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ requires = ["setuptools>=64", "setuptools-scm>=8.0", "wheel"] build-backend = "setuptools.build_meta" [project] -name = "MEDS-DEV" +name = "MEDS_DEV" dynamic = ["version"] authors = [ {name="Matthew B. A. McDermott", email="matthew_mcdermott@hms.harvard.edu"}, @@ -28,7 +28,7 @@ classifiers = [ "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", ] -dependencies = ["meds==0.3", "es-aces==0.3.0"] +dependencies = ["meds==0.3.3", "es-aces==0.5.0"] [tool.setuptools_scm] diff --git a/src/MEDS-DEV/__init__.py b/src/MEDS_DEV/__init__.py similarity index 100% rename from src/MEDS-DEV/__init__.py rename to src/MEDS_DEV/__init__.py diff --git a/src/MEDS_DEV/configs/_ACES_MD.yaml b/src/MEDS_DEV/configs/_ACES_MD.yaml new file mode 100644 index 0000000..7302772 --- /dev/null +++ b/src/MEDS_DEV/configs/_ACES_MD.yaml @@ -0,0 +1,23 @@ +defaults: + - _aces + - data: sharded + - _self_ + +dataset_name: ${oc.env:MEDS_DATASET_NAME} +task_name: ${oc.env:MEDS_TASK_NAME} +output_dir: "${oc.env:MEDS_ROOT_DIR}/task_labels" + +# TODO: find a nice way to have this be inferred automatically +MEDS_DEV_dir: "${oc.env:MEDS_DEV_REPO_DIR}" + +data: + standard: meds + root: "${oc.env:MEDS_ROOT_DIR}/data" + +# Cohort directory and name: used for automatically loading configs, saving results, and logging. +cohort_dir: ${output_dir} +cohort_name: ${task_name} + +# Path to the task configuration file +config_path: ${MEDS_DEV_dir}/tasks/criteria/${task_name}.yaml +predicates_path: ${MEDS_DEV_dir}/datasets/${dataset_name}/predicates.yaml diff --git a/src/MEDS-DEV/datasets/MIMIC-IV/README.md b/src/MEDS_DEV/datasets/MIMIC-IV/README.md similarity index 100% rename from src/MEDS-DEV/datasets/MIMIC-IV/README.md rename to src/MEDS_DEV/datasets/MIMIC-IV/README.md diff --git a/src/MEDS-DEV/datasets/MIMIC-IV/predicates.yaml b/src/MEDS_DEV/datasets/MIMIC-IV/predicates.yaml similarity index 100% rename from src/MEDS-DEV/datasets/MIMIC-IV/predicates.yaml rename to src/MEDS_DEV/datasets/MIMIC-IV/predicates.yaml diff --git a/src/MEDS-DEV/datasets/README.md b/src/MEDS_DEV/datasets/README.md similarity index 100% rename from src/MEDS-DEV/datasets/README.md rename to src/MEDS_DEV/datasets/README.md diff --git a/src/MEDS-DEV/tasks/README.md b/src/MEDS_DEV/tasks/README.md similarity index 100% rename from src/MEDS-DEV/tasks/README.md rename to src/MEDS_DEV/tasks/README.md diff --git a/src/MEDS-DEV/tasks/criteria/README.md b/src/MEDS_DEV/tasks/criteria/README.md similarity index 100% rename from src/MEDS-DEV/tasks/criteria/README.md rename to src/MEDS_DEV/tasks/criteria/README.md diff --git a/src/MEDS-DEV/tasks/criteria/abnormal_lab/README.md b/src/MEDS_DEV/tasks/criteria/abnormal_lab/README.md similarity index 100% rename from src/MEDS-DEV/tasks/criteria/abnormal_lab/README.md rename to src/MEDS_DEV/tasks/criteria/abnormal_lab/README.md diff --git a/src/MEDS-DEV/tasks/criteria/abnormal_lab/reduced_echo_derived_LVEF_via_ECG.yaml b/src/MEDS_DEV/tasks/criteria/abnormal_lab/reduced_echo_derived_LVEF_via_ECG.yaml similarity index 100% rename from src/MEDS-DEV/tasks/criteria/abnormal_lab/reduced_echo_derived_LVEF_via_ECG.yaml rename to src/MEDS_DEV/tasks/criteria/abnormal_lab/reduced_echo_derived_LVEF_via_ECG.yaml diff --git a/src/MEDS-DEV/tasks/criteria/disease_progression/README.md b/src/MEDS_DEV/tasks/criteria/disease_progression/README.md similarity index 100% rename from src/MEDS-DEV/tasks/criteria/disease_progression/README.md rename to src/MEDS_DEV/tasks/criteria/disease_progression/README.md diff --git a/src/MEDS-DEV/tasks/criteria/disease_progression/diabetes/README.md b/src/MEDS_DEV/tasks/criteria/disease_progression/diabetes/README.md similarity index 100% rename from src/MEDS-DEV/tasks/criteria/disease_progression/diabetes/README.md rename to src/MEDS_DEV/tasks/criteria/disease_progression/diabetes/README.md diff --git a/src/MEDS-DEV/tasks/criteria/disease_progression/diabetes/ckd_post_kidney_labs.yaml b/src/MEDS_DEV/tasks/criteria/disease_progression/diabetes/ckd_post_kidney_labs.yaml similarity index 100% rename from src/MEDS-DEV/tasks/criteria/disease_progression/diabetes/ckd_post_kidney_labs.yaml rename to src/MEDS_DEV/tasks/criteria/disease_progression/diabetes/ckd_post_kidney_labs.yaml diff --git a/src/MEDS-DEV/tasks/criteria/mortality/README.md b/src/MEDS_DEV/tasks/criteria/mortality/README.md similarity index 100% rename from src/MEDS-DEV/tasks/criteria/mortality/README.md rename to src/MEDS_DEV/tasks/criteria/mortality/README.md diff --git a/src/MEDS-DEV/tasks/criteria/mortality/in_icu/README.md b/src/MEDS_DEV/tasks/criteria/mortality/in_icu/README.md similarity index 100% rename from src/MEDS-DEV/tasks/criteria/mortality/in_icu/README.md rename to src/MEDS_DEV/tasks/criteria/mortality/in_icu/README.md diff --git a/src/MEDS-DEV/tasks/criteria/mortality/in_icu/first_24h.yaml b/src/MEDS_DEV/tasks/criteria/mortality/in_icu/first_24h.yaml similarity index 100% rename from src/MEDS-DEV/tasks/criteria/mortality/in_icu/first_24h.yaml rename to src/MEDS_DEV/tasks/criteria/mortality/in_icu/first_24h.yaml diff --git a/src/MEDS-DEV/tasks/criteria/phenotyping/README.md b/src/MEDS_DEV/tasks/criteria/phenotyping/README.md similarity index 100% rename from src/MEDS-DEV/tasks/criteria/phenotyping/README.md rename to src/MEDS_DEV/tasks/criteria/phenotyping/README.md diff --git a/src/MEDS-DEV/tasks/criteria/phenotyping/outpatient/MI/1y-5y.yaml b/src/MEDS_DEV/tasks/criteria/phenotyping/outpatient/MI/1y-5y.yaml similarity index 100% rename from src/MEDS-DEV/tasks/criteria/phenotyping/outpatient/MI/1y-5y.yaml rename to src/MEDS_DEV/tasks/criteria/phenotyping/outpatient/MI/1y-5y.yaml diff --git a/src/MEDS-DEV/tasks/criteria/phenotyping/outpatient/MI/README.md b/src/MEDS_DEV/tasks/criteria/phenotyping/outpatient/MI/README.md similarity index 100% rename from src/MEDS-DEV/tasks/criteria/phenotyping/outpatient/MI/README.md rename to src/MEDS_DEV/tasks/criteria/phenotyping/outpatient/MI/README.md diff --git a/src/MEDS-DEV/tasks/criteria/phenotyping/outpatient/README.md b/src/MEDS_DEV/tasks/criteria/phenotyping/outpatient/README.md similarity index 100% rename from src/MEDS-DEV/tasks/criteria/phenotyping/outpatient/README.md rename to src/MEDS_DEV/tasks/criteria/phenotyping/outpatient/README.md diff --git a/src/MEDS-DEV/tasks/criteria/readmission/README.md b/src/MEDS_DEV/tasks/criteria/readmission/README.md similarity index 100% rename from src/MEDS-DEV/tasks/criteria/readmission/README.md rename to src/MEDS_DEV/tasks/criteria/readmission/README.md diff --git a/src/MEDS-DEV/tasks/criteria/readmission/general_hospital/30d.yaml b/src/MEDS_DEV/tasks/criteria/readmission/general_hospital/30d.yaml similarity index 100% rename from src/MEDS-DEV/tasks/criteria/readmission/general_hospital/30d.yaml rename to src/MEDS_DEV/tasks/criteria/readmission/general_hospital/30d.yaml diff --git a/src/MEDS-DEV/tasks/criteria/readmission/general_hospital/README.md b/src/MEDS_DEV/tasks/criteria/readmission/general_hospital/README.md similarity index 100% rename from src/MEDS-DEV/tasks/criteria/readmission/general_hospital/README.md rename to src/MEDS_DEV/tasks/criteria/readmission/general_hospital/README.md From ea55a9aa0fbfb69d42ef1115763e6a5fa0b83b6c Mon Sep 17 00:00:00 2001 From: Matthew McDermott Date: Sun, 1 Sep 2024 14:27:52 -0400 Subject: [PATCH 2/5] Correcting typo --- src/MEDS_DEV/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/MEDS_DEV/__init__.py b/src/MEDS_DEV/__init__.py index 9a48f46..acc77c9 100644 --- a/src/MEDS_DEV/__init__.py +++ b/src/MEDS_DEV/__init__.py @@ -1,6 +1,6 @@ from importlib.metadata import PackageNotFoundError, version -__package_name__ = "MEDS-DEV" +__package_name__ = "MEDS_DEV" try: __version__ = version(__package_name__) except PackageNotFoundError: From 9ced01555b20713e4d7c4990177f57962a388a85 Mon Sep 17 00:00:00 2001 From: Matthew McDermott Date: Sun, 1 Sep 2024 14:59:00 -0400 Subject: [PATCH 3/5] Modified extract a bit --- src/MEDS_DEV/configs/__init__.py | 0 src/MEDS_DEV/helpers/__init__.py | 0 src/MEDS_DEV/helpers/extract_task.sh | 16 ++++++++++++++++ 3 files changed, 16 insertions(+) create mode 100644 src/MEDS_DEV/configs/__init__.py create mode 100644 src/MEDS_DEV/helpers/__init__.py create mode 100755 src/MEDS_DEV/helpers/extract_task.sh diff --git a/src/MEDS_DEV/configs/__init__.py b/src/MEDS_DEV/configs/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/MEDS_DEV/helpers/__init__.py b/src/MEDS_DEV/helpers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/MEDS_DEV/helpers/extract_task.sh b/src/MEDS_DEV/helpers/extract_task.sh new file mode 100755 index 0000000..1884d17 --- /dev/null +++ b/src/MEDS_DEV/helpers/extract_task.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +export MEDS_ROOT_DIR=$1 +export MEDS_DATASET_NAME=$2 +export MEDS_TASK_NAME=$3 + +shift 3 + +MEDS_DEV_REPO_DIR=$(python -c "from importlib.resources import files; print(files(\"MEDS_DEV\"))") +export MEDS_DEV_REPO_DIR + +DATA_DIR="$MEDS_ROOT_DIR"/"$MEDS_DATASET_NAME"/data +SHARDS=$(expand_shards "$DATA_DIR") + +aces-cli --config-name="_ACES_MD" "hydra.searchpath=[pkg://MEDS_DEV.configs,pkg://aces.configs]" \ + "data.shard=$SHARDS" -m "$@" From 72cf13563c9cfa832fb781d5443c4737a4e2908d Mon Sep 17 00:00:00 2001 From: Matthew McDermott Date: Sun, 1 Sep 2024 15:14:33 -0400 Subject: [PATCH 4/5] Basic task extraction workflow viable --- src/MEDS_DEV/configs/_ACES_MD.yaml | 2 +- .../datasets/MIMIC-IV/predicates.yaml | 29 ++++++++++--------- src/MEDS_DEV/helpers/extract_task.sh | 7 ++--- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/src/MEDS_DEV/configs/_ACES_MD.yaml b/src/MEDS_DEV/configs/_ACES_MD.yaml index 7302772..ab4dc8d 100644 --- a/src/MEDS_DEV/configs/_ACES_MD.yaml +++ b/src/MEDS_DEV/configs/_ACES_MD.yaml @@ -1,6 +1,6 @@ defaults: - _aces - - data: sharded + - override data: sharded - _self_ dataset_name: ${oc.env:MEDS_DATASET_NAME} diff --git a/src/MEDS_DEV/datasets/MIMIC-IV/predicates.yaml b/src/MEDS_DEV/datasets/MIMIC-IV/predicates.yaml index 7ef0769..cc81b04 100644 --- a/src/MEDS_DEV/datasets/MIMIC-IV/predicates.yaml +++ b/src/MEDS_DEV/datasets/MIMIC-IV/predicates.yaml @@ -1,17 +1,18 @@ -hospital_admission: - code: { regex: "^HOSPITAL_ADMISSION//.*" } -hospital_discharge: - code: { regex: "^HOSPITAL_DISCHARGE//.*" } +predicates: + hospital_admission: + code: { regex: "^HOSPITAL_ADMISSION//.*" } + hospital_discharge: + code: { regex: "^HOSPITAL_DISCHARGE//.*" } -ED_registration: - code: { regex: "^ED_REGISTRATION//.*" } -ED_discharge: - code: { regex: "^ED_OUT//.*" } + ED_registration: + code: { regex: "^ED_REGISTRATION//.*" } + ED_discharge: + code: { regex: "^ED_OUT//.*" } -icu_admission: - code: { regex: "^ICU_ADMISSION//.*" } -icu_discharge: - code: { regex: "^ICU_DISCHARGE//.*" } + icu_admission: + code: { regex: "^ICU_ADMISSION//.*" } + icu_discharge: + code: { regex: "^ICU_DISCHARGE//.*" } -death: - code: MEDS_DEATH + death: + code: MEDS_DEATH diff --git a/src/MEDS_DEV/helpers/extract_task.sh b/src/MEDS_DEV/helpers/extract_task.sh index 1884d17..be1ff9c 100755 --- a/src/MEDS_DEV/helpers/extract_task.sh +++ b/src/MEDS_DEV/helpers/extract_task.sh @@ -9,8 +9,7 @@ shift 3 MEDS_DEV_REPO_DIR=$(python -c "from importlib.resources import files; print(files(\"MEDS_DEV\"))") export MEDS_DEV_REPO_DIR -DATA_DIR="$MEDS_ROOT_DIR"/"$MEDS_DATASET_NAME"/data -SHARDS=$(expand_shards "$DATA_DIR") +SHARDS=$(expand_shards "$MEDS_ROOT_DIR"/data) -aces-cli --config-name="_ACES_MD" "hydra.searchpath=[pkg://MEDS_DEV.configs,pkg://aces.configs]" \ - "data.shard=$SHARDS" -m "$@" +aces-cli --config-path="$MEDS_DEV_REPO_DIR"/configs --config-name="_ACES_MD" \ + "hydra.searchpath=[pkg://aces.configs]" "data.shard=$SHARDS" -m "$@" From 77e8af54ba343aaf5dabc226fcb3e588adcac41f Mon Sep 17 00:00:00 2001 From: Matthew McDermott Date: Sun, 1 Sep 2024 15:19:14 -0400 Subject: [PATCH 5/5] Adding documentation. --- README.md | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/README.md b/README.md index 5f6d098..c7460b6 100644 --- a/README.md +++ b/README.md @@ -25,3 +25,27 @@ TODO ### To Add Results TODO + +## Helpers + +### To extract a task + +First, clone the repo and install it locally with `pip install .` Then, make sure you have the desired task +criteria and dataset predicates yaml files in their respective locations in the repo. + +Finally, run the following: + +```bash +./src/MEDS_DEV/helpers/extract_task.sh $MEDS_ROOT_DIR $DATASET_NAME $TASK_NAME +``` + +E.g., + +```bash +./src/MEDS_DEV/helpers/extract_task.sh ../MEDS_TAB_COMPL_TEST/MIMIC-IV/ MIMIC-IV mortality/in_icu/first_24h +``` + +which will use the `datasets/MIMIC-IV/predicates.yaml` predicates file, the +`tasks/criteria/mortality/in_icu/first_24h.yaml` task criteria, and will run over the dataset in the root +directory at `../MEDS_TAB_COMPL_TEST/MIMIC-IV`, reading data from the `data` subdir of that root dir and +writing labels to the `task_labels` subdir of that root dir, in a name dependent manner.