From 91d30eeb96bc8fdd76a873e53599ce96215e87c1 Mon Sep 17 00:00:00 2001
From: Nassim Oufattole <noufattole@gmail.com>
Date: Sat, 14 Dec 2024 17:14:23 -0800
Subject: [PATCH] fixed e2e meds transform bug with eicu where the extraction
 config still had the infusionDrug table. Added ACES and meds-dev label
 extraction

---
 .gitignore                         |   2 +
 demo/aces.ipynb                    | 662 +++++++++++++++++++++++------
 demo/configs/extract_MIMIC.yaml    |   8 +-
 demo/extract_meds_data.ipynb       |   3 +-
 demo/meds_tab.ipynb                |  77 ----
 pyproject.toml                     |   2 +-
 src/MEDS_DEV/demo/meds_cehrbert.py | 397 -----------------
 src/MEDS_DEV/demo/meds_tab.ipynb   | 485 ---------------------
 src/MEDS_DEV/demo/meds_tab.py      | 240 -----------
 9 files changed, 551 insertions(+), 1325 deletions(-)
 delete mode 100644 src/MEDS_DEV/demo/meds_cehrbert.py
 delete mode 100644 src/MEDS_DEV/demo/meds_tab.ipynb
 delete mode 100644 src/MEDS_DEV/demo/meds_tab.py

diff --git a/.gitignore b/.gitignore
index c2b1661..3a41f4d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -361,3 +361,5 @@ $RECYCLE.BIN/
 meds_env/*
 src/MEDS_DEV/demo/download/*
 src/MEDS_DEV/demo/content/*
+
+demo/work_dir
\ No newline at end of file
diff --git a/demo/aces.ipynb b/demo/aces.ipynb
index 8d03cbf..7b31586 100644
--- a/demo/aces.ipynb
+++ b/demo/aces.ipynb
@@ -30,20 +30,48 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/\n"
+     ]
+    }
+   ],
    "source": [
-    "ROOT_DIR = \"\""
+    "#@title Download E-ICU demo\n",
+    "import tempfile\n",
+    "import os\n",
+    "from pathlib import Path\n",
+    "notebook_dir = os.getcwd()\n",
+    "\n",
+    "# Choose MIMICIV or eicu\n",
+    "ROOT_DIR=f\"{notebook_dir}/work_dir/mimiciv_demo/\"\n",
+    "# ROOT_DIR=f\"{notebook_dir}/work_dir/eicu_demo/\"\n",
+    "Path(ROOT_DIR).mkdir(parents=True, exist_ok=True)\n",
+    "\n",
+    "!echo {ROOT_DIR}"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 60,
    "metadata": {
     "id": "H6fqe217XDhi"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "TASK_DIR\n",
+      "mkdir: -p: File exists\n"
+     ]
+    }
+   ],
    "source": [
     "# From the ACES documentation\n",
     "\n",
@@ -88,16 +116,19 @@
     "    end_inclusive: True\n",
     "    label: discharge_or_death\n",
     "\"\"\"\n",
-    "!mkdir /content/tasks/ -p\n",
-    "TASK_NAME = \"in_hospital_3d_los_after_48h\"\n",
-    "TASK_CONFIG_FP = f\"/content/tasks/{TASK_NAME}.yaml\"\n",
+    "MEDS_DIR = ROOT_DIR + \"/meds\"\n",
+    "TASK_DIR = MEDS_DIR + \"/task_labels\"\n",
+    "! echo TASK_DIR\n",
+    "TASK_NAME = \"los_in_hospital_first_48h\"\n",
+    "TASK_CONFIG_FP = f\"{TASK_DIR}/{TASK_NAME}.yaml\"\n",
+    "!mkdir {TASK_DIR}/{TASK_NAME} -p\n",
     "with open(TASK_CONFIG_FP, 'w') as f:\n",
     "    f.write(task_config)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 61,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
@@ -105,14 +136,178 @@
     "id": "bXLiJGEry-Gb",
     "outputId": "7d954ab4-cf5c-4d02-a99c-669b5822bf44"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[2024-12-14 17:02:13,334][HYDRA] Launching 3 jobs locally\n",
+      "[2024-12-14 17:02:13,334][HYDRA] \t#0 : data=sharded data.standard=meds data.root=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/data data.shard=held_out/0 cohort_dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels cohort_name=los_in_hospital_first_48h config_path=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml\n",
+      "\u001b[32m2024-12-14 17:02:13.542\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml'\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.545\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.545\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.545\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.547\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n",
+      "standard: meds\n",
+      "ts_format: '%m/%d/%Y %H:%M'\n",
+      "root: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/data\n",
+      "shard: held_out/0\n",
+      "path: ${data.root}/${data.shard}.parquet\n",
+      "_prefix: /${data.shard}\n",
+      "\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.547\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.561\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.566\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_admission'.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.567\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_discharge'.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.568\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.568\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.575\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.576\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.576\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.576\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.578\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n",
+      "trigger\n",
+      "┣━━ input.end\n",
+      "┃   ┣━━ input.start\n",
+      "┃   ┗━━ gap.end\n",
+      "┗━━ target.end\n",
+      "\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.578\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.578\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.579\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.580\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 4,155 rows as they failed to satisfy '1 <= hospital_admission <= None'.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.580\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.593\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.622\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.630\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 0 rows as they failed to satisfy 'None <= hospital_admission <= 0'.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.630\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 4 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.634\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.642\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 16 valid rows returned corresponding to 10 subjects.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.642\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'discharge_or_death' from window 'target'...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.642\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.645\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n",
+      "If you need these columns, please comment on https://github.com/justin13601/ACES/issues/97\n",
+      "Columns:\n",
+      "  - trigger\n",
+      "  - input.end_summary\n",
+      "  - input.start_summary\n",
+      "  - gap.end_summary\n",
+      "  - target.end_summary\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.776\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.233536. Results saved to '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h/held_out/0.parquet'.\u001b[0m\n",
+      "[2024-12-14 17:02:13,777][HYDRA] \t#1 : data=sharded data.standard=meds data.root=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/data data.shard=train/0 cohort_dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels cohort_name=los_in_hospital_first_48h config_path=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml\n",
+      "\u001b[32m2024-12-14 17:02:13.842\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml'\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.845\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.845\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.845\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.846\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n",
+      "standard: meds\n",
+      "ts_format: '%m/%d/%Y %H:%M'\n",
+      "root: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/data\n",
+      "shard: train/0\n",
+      "path: ${data.root}/${data.shard}.parquet\n",
+      "_prefix: /${data.shard}\n",
+      "\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.847\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.891\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.899\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_admission'.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.907\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_discharge'.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.910\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.910\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.927\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.927\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.927\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.928\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.930\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n",
+      "trigger\n",
+      "┣━━ input.end\n",
+      "┃   ┣━━ input.start\n",
+      "┃   ┗━━ gap.end\n",
+      "┗━━ target.end\n",
+      "\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.930\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.930\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.930\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.930\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 72,649 rows as they failed to satisfy '1 <= hospital_admission <= None'.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.931\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:13.972\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:14.059\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:14.144\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 0 rows as they failed to satisfy 'None <= hospital_admission <= 0'.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:14.144\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 26 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:14.148\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:14.238\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 212 valid rows returned corresponding to 75 subjects.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:14.238\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'discharge_or_death' from window 'target'...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:14.238\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:14.240\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n",
+      "If you need these columns, please comment on https://github.com/justin13601/ACES/issues/97\n",
+      "Columns:\n",
+      "  - trigger\n",
+      "  - input.end_summary\n",
+      "  - input.start_summary\n",
+      "  - gap.end_summary\n",
+      "  - target.end_summary\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:14.243\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.399837. Results saved to '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h/train/0.parquet'.\u001b[0m\n",
+      "[2024-12-14 17:02:14,243][HYDRA] \t#2 : data=sharded data.standard=meds data.root=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/data data.shard=tuning/0 cohort_dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels cohort_name=los_in_hospital_first_48h config_path=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml\n",
+      "\u001b[32m2024-12-14 17:02:14.309\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml'\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:14.312\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:14.312\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:14.312\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:14.313\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n",
+      "standard: meds\n",
+      "ts_format: '%m/%d/%Y %H:%M'\n",
+      "root: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/data\n",
+      "shard: tuning/0\n",
+      "path: ${data.root}/${data.shard}.parquet\n",
+      "_prefix: /${data.shard}\n",
+      "\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:14.313\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:14.315\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:14.316\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_admission'.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:14.317\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_discharge'.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:14.317\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:14.318\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:14.320\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:14.320\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:14.320\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:14.320\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:14.321\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n",
+      "trigger\n",
+      "┣━━ input.end\n",
+      "┃   ┣━━ input.start\n",
+      "┃   ┗━━ gap.end\n",
+      "┗━━ target.end\n",
+      "\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:14.321\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:14.321\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:14.321\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:14.321\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 6,240 rows as they failed to satisfy '1 <= hospital_admission <= None'.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:14.321\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:14.331\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:14.342\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:14.349\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 0 rows as they failed to satisfy 'None <= hospital_admission <= 0'.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:14.349\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 2 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:14.352\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:14.360\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 15 valid rows returned corresponding to 9 subjects.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:14.360\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'discharge_or_death' from window 'target'...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:14.360\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:14.361\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n",
+      "If you need these columns, please comment on https://github.com/justin13601/ACES/issues/97\n",
+      "Columns:\n",
+      "  - trigger\n",
+      "  - input.end_summary\n",
+      "  - input.start_summary\n",
+      "  - gap.end_summary\n",
+      "  - target.end_summary\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:14.364\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.054431. Results saved to '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h/tuning/0.parquet'.\u001b[0m\n"
+     ]
+    }
+   ],
    "source": [
-    "!aces-cli --multirun data=sharded data.standard=meds data.root=\"$MIMICIV_MEDS_DIR/data\" \"data.shard=$(expand_shards  /content/meds/data/)\" cohort_dir=\" /content/tasks\" cohort_name=\"$TASK_NAME\" config_path=\"$TASK_CONFIG_FP\""
+    "!aces-cli --multirun data=sharded data.standard=meds data.root={MEDS_DIR}/data data.shard=$(expand_shards  {MEDS_DIR}/data/) cohort_dir={TASK_DIR} cohort_name={TASK_NAME} config_path={TASK_CONFIG_FP}"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 62,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/",
@@ -121,145 +316,364 @@
     "id": "7Vvac7DIWyRT",
     "outputId": "40493f0e-48ba-4f5e-9d9a-401e26f1a9b7"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "train prevalence: 0.231\n",
+      "tuning prevalence: 0.133\n",
+      "held_out prevalence: 0.25\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div><style>\n",
+       ".dataframe > thead > tr,\n",
+       ".dataframe > tbody > tr {\n",
+       "  text-align: right;\n",
+       "  white-space: pre-wrap;\n",
+       "}\n",
+       "</style>\n",
+       "<small>shape: (243, 6)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>subject_id</th><th>prediction_time</th><th>boolean_value</th><th>integer_value</th><th>float_value</th><th>categorical_value</th></tr><tr><td>i64</td><td>datetime[μs]</td><td>bool</td><td>i64</td><td>f64</td><td>str</td></tr></thead><tbody><tr><td>10012853</td><td>2175-04-07 15:36:00</td><td>false</td><td>null</td><td>null</td><td>null</td></tr><tr><td>10012853</td><td>2176-11-27 21:28:00</td><td>false</td><td>null</td><td>null</td><td>null</td></tr><tr><td>10014729</td><td>2125-03-01 07:15:00</td><td>false</td><td>null</td><td>null</td><td>null</td></tr><tr><td>10014729</td><td>2125-03-21 16:58:00</td><td>false</td><td>null</td><td>null</td><td>null</td></tr><tr><td>10016742</td><td>2178-07-05 21:13:00</td><td>false</td><td>null</td><td>null</td><td>null</td></tr><tr><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td></tr><tr><td>10039997</td><td>2135-11-09 02:42:00</td><td>true</td><td>null</td><td>null</td><td>null</td></tr><tr><td>10040025</td><td>2143-03-20 12:34:00</td><td>true</td><td>null</td><td>null</td><td>null</td></tr><tr><td>10040025</td><td>2145-07-05 23:46:00</td><td>true</td><td>null</td><td>null</td><td>null</td></tr><tr><td>10020740</td><td>2150-09-17 14:09:00</td><td>true</td><td>null</td><td>null</td><td>null</td></tr><tr><td>10020740</td><td>2151-01-17 15:25:00</td><td>true</td><td>null</td><td>null</td><td>null</td></tr></tbody></table></div>"
+      ],
+      "text/plain": [
+       "shape: (243, 6)\n",
+       "┌────────────┬───────────────────┬───────────────┬───────────────┬─────────────┬───────────────────┐\n",
+       "│ subject_id ┆ prediction_time   ┆ boolean_value ┆ integer_value ┆ float_value ┆ categorical_value │\n",
+       "│ ---        ┆ ---               ┆ ---           ┆ ---           ┆ ---         ┆ ---               │\n",
+       "│ i64        ┆ datetime[μs]      ┆ bool          ┆ i64           ┆ f64         ┆ str               │\n",
+       "╞════════════╪═══════════════════╪═══════════════╪═══════════════╪═════════════╪═══════════════════╡\n",
+       "│ 10012853   ┆ 2175-04-07        ┆ false         ┆ null          ┆ null        ┆ null              │\n",
+       "│            ┆ 15:36:00          ┆               ┆               ┆             ┆                   │\n",
+       "│ 10012853   ┆ 2176-11-27        ┆ false         ┆ null          ┆ null        ┆ null              │\n",
+       "│            ┆ 21:28:00          ┆               ┆               ┆             ┆                   │\n",
+       "│ 10014729   ┆ 2125-03-01        ┆ false         ┆ null          ┆ null        ┆ null              │\n",
+       "│            ┆ 07:15:00          ┆               ┆               ┆             ┆                   │\n",
+       "│ 10014729   ┆ 2125-03-21        ┆ false         ┆ null          ┆ null        ┆ null              │\n",
+       "│            ┆ 16:58:00          ┆               ┆               ┆             ┆                   │\n",
+       "│ 10016742   ┆ 2178-07-05        ┆ false         ┆ null          ┆ null        ┆ null              │\n",
+       "│            ┆ 21:13:00          ┆               ┆               ┆             ┆                   │\n",
+       "│ …          ┆ …                 ┆ …             ┆ …             ┆ …           ┆ …                 │\n",
+       "│ 10039997   ┆ 2135-11-09        ┆ true          ┆ null          ┆ null        ┆ null              │\n",
+       "│            ┆ 02:42:00          ┆               ┆               ┆             ┆                   │\n",
+       "│ 10040025   ┆ 2143-03-20        ┆ true          ┆ null          ┆ null        ┆ null              │\n",
+       "│            ┆ 12:34:00          ┆               ┆               ┆             ┆                   │\n",
+       "│ 10040025   ┆ 2145-07-05        ┆ true          ┆ null          ┆ null        ┆ null              │\n",
+       "│            ┆ 23:46:00          ┆               ┆               ┆             ┆                   │\n",
+       "│ 10020740   ┆ 2150-09-17        ┆ true          ┆ null          ┆ null        ┆ null              │\n",
+       "│            ┆ 14:09:00          ┆               ┆               ┆             ┆                   │\n",
+       "│ 10020740   ┆ 2151-01-17        ┆ true          ┆ null          ┆ null        ┆ null              │\n",
+       "│            ┆ 15:25:00          ┆               ┆               ┆             ┆                   │\n",
+       "└────────────┴───────────────────┴───────────────┴───────────────┴─────────────┴───────────────────┘"
+      ]
+     },
+     "execution_count": 62,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "import polars as pl\n",
     "\n",
     "# execute query and get results\n",
-    "df = pl.read_parquet(f\"/content/tasks/{TASK_NAME}/**/*.parquet\")\n",
+    "df = pl.read_parquet(f\"{TASK_DIR}/{TASK_NAME}/**/*.parquet\")\n",
     "\n",
-    "print(\"train prevalence: \" + str(round(pl.read_parquet(f\"/content/tasks/{TASK_NAME}/train/*.parquet\")['boolean_value'].mean(), 3)))\n",
-    "print(\"tuning prevalence: \" + str(round(pl.read_parquet(f\"/content/tasks/{TASK_NAME}/tuning/*.parquet\")['boolean_value'].mean(), 3)))\n",
-    "print(\"held_out prevalence: \" + str(round(pl.read_parquet(f\"/content/tasks/{TASK_NAME}/held_out/*.parquet\")['boolean_value'].mean(), 3)))\n",
+    "print(\"train prevalence: \" + str(round(pl.read_parquet(f\"{TASK_DIR}/{TASK_NAME}/train/*.parquet\")['boolean_value'].mean(), 3)))\n",
+    "print(\"tuning prevalence: \" + str(round(pl.read_parquet(f\"{TASK_DIR}/{TASK_NAME}/tuning/*.parquet\")['boolean_value'].mean(), 3)))\n",
+    "print(\"held_out prevalence: \" + str(round(pl.read_parquet(f\"{TASK_DIR}/{TASK_NAME}/held_out/*.parquet\")['boolean_value'].mean(), 3)))\n",
     "\n",
     "\n",
     "df.sort('boolean_value')"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 1000
-    },
-    "id": "XWB7O1UGhRIo",
-    "outputId": "e3416d5e-7427-4cf4-c0ab-20053a9d3430"
-   },
-   "outputs": [],
-   "source": [
-    "#@title Install meds-tab\n",
-    "\n",
-    "!pip uninstall es-aces -y\n",
-    "!pip install meds-tab"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "SeGawIqli0nn"
-   },
-   "outputs": [],
-   "source": [
-    "MIMICIV_MEDS_DIR = \"/content/meds/\"\n",
-    "OUTPUT_TABULARIZATION_DIR=\"/content/tabularized/\"\n",
-    "TASK_DIR=\"/content/tasks/\"\n",
-    "TASK_NAME=\"in_hospital_3d_los_after_48h\"\n",
-    "OUTPUT_MODEL_DIR=\"/content/output/meds_tab/\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
-    },
-    "id": "Tud0_0cgjljP",
-    "outputId": "fb3417e0-3ba4-4f9a-ab95-ce3ba8731ca1"
-   },
-   "outputs": [],
-   "source": [
-    "!meds-tab-describe input_dir={MIMICIV_MEDS_DIR}/data output_dir={OUTPUT_TABULARIZATION_DIR}"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "RVLBdOn1mnV5"
-   },
-   "outputs": [],
+   "cell_type": "markdown",
+   "metadata": {},
    "source": [
-    "# Define the window sizes and aggregations to generate features for\n",
-    "WINDOW_SIZES = \"tabularization.window_sizes=[1d,30d,365d]\"\n",
-    "AGGREGATIONS = \"tabularization.aggs=[static/present,code/count,value/count,value/sum,value/sum_sqd,value/min,value/max]\""
+    "### MEDS-DEV Has tons of pre-defined tasks we can use!!!"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "KhCPqBmduNYK"
-   },
-   "outputs": [],
+   "execution_count": 63,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Running task mortality/in_icu/first_24h on dataset MIMIC-IV with MEDS_ROOT_DIR=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds and SHARDS=held_out/0,train/0,tuning/0\n",
+      "[2024-12-14 17:02:21,042][HYDRA] Launching 3 jobs locally\n",
+      "[2024-12-14 17:02:21,042][HYDRA] \t#0 : data.shard=held_out/0\n",
+      "\u001b[32m2024-12-14 17:02:21.188\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/Users/sim/Documents/projects/MEDS-DEV/src/MEDS_DEV/tasks/criteria/mortality/in_icu/first_24h.yaml'\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.190\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m151\u001b[0m - \u001b[1mOverriding predicates and/or demographics from '/Users/sim/Documents/projects/MEDS-DEV/src/MEDS_DEV/datasets/MIMIC-IV/predicates.yaml'\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.207\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.207\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.207\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.209\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n",
+      "standard: meds\n",
+      "ts_format: '%m/%d/%Y %H:%M'\n",
+      "root: ${oc.env:MEDS_ROOT_DIR}/data\n",
+      "shard: held_out/0\n",
+      "path: ${data.root}/${data.shard}.parquet\n",
+      "_prefix: /${data.shard}\n",
+      "\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.209\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.223\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.229\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_admission'.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.230\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_discharge'.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.231\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.231\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.237\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.238\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.238\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.238\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.240\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n",
+      "trigger\n",
+      "┣━━ input.end\n",
+      "┃   ┗━━ input.start\n",
+      "┗━━ gap.end\n",
+      "    ┗━━ target.end\n",
+      "\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.240\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.240\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.242\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.242\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 4,163 rows as they failed to satisfy '1 <= icu_admission <= None'.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.243\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.255\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.277\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.285\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 0 rows as they failed to satisfy 'None <= icu_admission <= 0'.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.285\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 6 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.286\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.302\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 6 valid rows returned corresponding to 4 subjects.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.302\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'death' from window 'target'...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.303\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m142\u001b[0m - \u001b[33m\u001b[1mAll labels in the extracted cohort are the same: '0'. This may indicate an issue with the task logic. Please double-check your configuration file if this is not expected.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.303\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.306\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n",
+      "If you need these columns, please comment on https://github.com/justin13601/ACES/issues/97\n",
+      "Columns:\n",
+      "  - trigger\n",
+      "  - input.end_summary\n",
+      "  - input.start_summary\n",
+      "  - gap.end_summary\n",
+      "  - target.end_summary\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.398\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.209584. Results saved to '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/mortality/in_icu/first_24h/held_out/0.parquet'.\u001b[0m\n",
+      "[2024-12-14 17:02:21,399][HYDRA] \t#1 : data.shard=train/0\n",
+      "\u001b[32m2024-12-14 17:02:21.469\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/Users/sim/Documents/projects/MEDS-DEV/src/MEDS_DEV/tasks/criteria/mortality/in_icu/first_24h.yaml'\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.470\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m151\u001b[0m - \u001b[1mOverriding predicates and/or demographics from '/Users/sim/Documents/projects/MEDS-DEV/src/MEDS_DEV/datasets/MIMIC-IV/predicates.yaml'\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.485\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.485\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.485\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.486\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n",
+      "standard: meds\n",
+      "ts_format: '%m/%d/%Y %H:%M'\n",
+      "root: ${oc.env:MEDS_ROOT_DIR}/data\n",
+      "shard: train/0\n",
+      "path: ${data.root}/${data.shard}.parquet\n",
+      "_prefix: /${data.shard}\n",
+      "\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.487\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.516\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.531\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_admission'.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.546\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_discharge'.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.549\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.549\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.564\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.565\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.565\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.565\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.567\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n",
+      "trigger\n",
+      "┣━━ input.end\n",
+      "┃   ┗━━ input.start\n",
+      "┗━━ gap.end\n",
+      "    ┗━━ target.end\n",
+      "\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.568\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.568\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.568\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.568\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 72,774 rows as they failed to satisfy '1 <= icu_admission <= None'.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.569\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.622\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.680\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.714\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 2 rows as they failed to satisfy 'None <= icu_admission <= 0'.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.715\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 53 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.715\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.804\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 60 valid rows returned corresponding to 47 subjects.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.804\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'death' from window 'target'...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.805\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.806\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n",
+      "If you need these columns, please comment on https://github.com/justin13601/ACES/issues/97\n",
+      "Columns:\n",
+      "  - trigger\n",
+      "  - input.end_summary\n",
+      "  - input.start_summary\n",
+      "  - gap.end_summary\n",
+      "  - target.end_summary\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.810\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.340355. Results saved to '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/mortality/in_icu/first_24h/train/0.parquet'.\u001b[0m\n",
+      "[2024-12-14 17:02:21,810][HYDRA] \t#2 : data.shard=tuning/0\n",
+      "\u001b[32m2024-12-14 17:02:21.878\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/Users/sim/Documents/projects/MEDS-DEV/src/MEDS_DEV/tasks/criteria/mortality/in_icu/first_24h.yaml'\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.879\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m151\u001b[0m - \u001b[1mOverriding predicates and/or demographics from '/Users/sim/Documents/projects/MEDS-DEV/src/MEDS_DEV/datasets/MIMIC-IV/predicates.yaml'\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.894\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.895\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.895\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.895\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n",
+      "standard: meds\n",
+      "ts_format: '%m/%d/%Y %H:%M'\n",
+      "root: ${oc.env:MEDS_ROOT_DIR}/data\n",
+      "shard: tuning/0\n",
+      "path: ${data.root}/${data.shard}.parquet\n",
+      "_prefix: /${data.shard}\n",
+      "\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.896\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.897\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.899\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_admission'.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.901\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_discharge'.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.901\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.901\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.903\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.903\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.904\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.904\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.904\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n",
+      "trigger\n",
+      "┣━━ input.end\n",
+      "┃   ┗━━ input.start\n",
+      "┗━━ gap.end\n",
+      "    ┗━━ target.end\n",
+      "\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.904\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.904\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.905\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.905\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 6,242 rows as they failed to satisfy '1 <= icu_admission <= None'.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.905\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.911\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.924\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.931\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 0 rows as they failed to satisfy 'None <= icu_admission <= 0'.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.931\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 7 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.932\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.945\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 8 valid rows returned corresponding to 5 subjects.\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.945\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'death' from window 'target'...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.945\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.946\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n",
+      "If you need these columns, please comment on https://github.com/justin13601/ACES/issues/97\n",
+      "Columns:\n",
+      "  - trigger\n",
+      "  - input.end_summary\n",
+      "  - input.start_summary\n",
+      "  - gap.end_summary\n",
+      "  - target.end_summary\u001b[0m\n",
+      "\u001b[32m2024-12-14 17:02:21.950\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.071263. Results saved to '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/mortality/in_icu/first_24h/tuning/0.parquet'.\u001b[0m\n"
+     ]
+    }
+   ],
    "source": [
-    "!rm -rf /content/tabularized/tabularize/"
+    "TASK_NAME=\"mortality/in_icu/first_24h\"\n",
+    "!../src/MEDS_DEV/helpers/extract_task.sh {MEDS_DIR} \"MIMIC-IV\" {TASK_NAME}"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
-    },
-    "id": "p_D07KzxjVUl",
-    "outputId": "8836b076-cf64-4f29-da81-ac5125ab7608"
-   },
-   "outputs": [],
+   "execution_count": 66,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/mortality/in_icu/first_24h/**/*.parquet\n"
+     ]
+    }
+   ],
    "source": [
-    "!meds-tab-tabularize-static \"input_dir=$MIMICIV_MEDS_DIR/data\" \"output_dir=$OUTPUT_TABULARIZATION_DIR\" do_overwrite=False $WINDOW_SIZES $AGGREGATIONS"
+    "!echo \"{TASK_DIR}/{TASK_NAME}/**/*.parquet\""
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
+   "execution_count": 67,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "train prevalence: 0.133\n",
+      "tuning prevalence: 0.125\n",
+      "held_out prevalence: 0.0\n"
+     ]
     },
-    "id": "u-e-mV2Hk-Qf",
-    "outputId": "c292be12-ff74-44e4-f039-758e10ccc909"
-   },
-   "outputs": [],
+    {
+     "data": {
+      "text/html": [
+       "<div><style>\n",
+       ".dataframe > thead > tr,\n",
+       ".dataframe > tbody > tr {\n",
+       "  text-align: right;\n",
+       "  white-space: pre-wrap;\n",
+       "}\n",
+       "</style>\n",
+       "<small>shape: (74, 6)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>subject_id</th><th>prediction_time</th><th>boolean_value</th><th>integer_value</th><th>float_value</th><th>categorical_value</th></tr><tr><td>i64</td><td>datetime[μs]</td><td>bool</td><td>i64</td><td>f64</td><td>str</td></tr></thead><tbody><tr><td>10012853</td><td>2176-11-27 02:34:49</td><td>false</td><td>null</td><td>null</td><td>null</td></tr><tr><td>10014729</td><td>2125-02-28 10:03:08</td><td>false</td><td>null</td><td>null</td><td>null</td></tr><tr><td>10016742</td><td>2178-07-04 22:45:00</td><td>false</td><td>null</td><td>null</td><td>null</td></tr><tr><td>10016742</td><td>2178-07-14 08:16:00</td><td>false</td><td>null</td><td>null</td><td>null</td></tr><tr><td>10016742</td><td>2178-07-23 08:19:00</td><td>false</td><td>null</td><td>null</td><td>null</td></tr><tr><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td></tr><tr><td>10010471</td><td>2155-12-03 20:33:00</td><td>true</td><td>null</td><td>null</td><td>null</td></tr><tr><td>10015931</td><td>2177-03-25 21:48:07</td><td>true</td><td>null</td><td>null</td><td>null</td></tr><tr><td>10037861</td><td>2117-03-15 16:34:58</td><td>true</td><td>null</td><td>null</td><td>null</td></tr><tr><td>10037975</td><td>2185-01-18 19:12:12</td><td>true</td><td>null</td><td>null</td><td>null</td></tr><tr><td>10038081</td><td>2115-10-10 10:15:25</td><td>true</td><td>null</td><td>null</td><td>null</td></tr></tbody></table></div>"
+      ],
+      "text/plain": [
+       "shape: (74, 6)\n",
+       "┌────────────┬───────────────────┬───────────────┬───────────────┬─────────────┬───────────────────┐\n",
+       "│ subject_id ┆ prediction_time   ┆ boolean_value ┆ integer_value ┆ float_value ┆ categorical_value │\n",
+       "│ ---        ┆ ---               ┆ ---           ┆ ---           ┆ ---         ┆ ---               │\n",
+       "│ i64        ┆ datetime[μs]      ┆ bool          ┆ i64           ┆ f64         ┆ str               │\n",
+       "╞════════════╪═══════════════════╪═══════════════╪═══════════════╪═════════════╪═══════════════════╡\n",
+       "│ 10012853   ┆ 2176-11-27        ┆ false         ┆ null          ┆ null        ┆ null              │\n",
+       "│            ┆ 02:34:49          ┆               ┆               ┆             ┆                   │\n",
+       "│ 10014729   ┆ 2125-02-28        ┆ false         ┆ null          ┆ null        ┆ null              │\n",
+       "│            ┆ 10:03:08          ┆               ┆               ┆             ┆                   │\n",
+       "│ 10016742   ┆ 2178-07-04        ┆ false         ┆ null          ┆ null        ┆ null              │\n",
+       "│            ┆ 22:45:00          ┆               ┆               ┆             ┆                   │\n",
+       "│ 10016742   ┆ 2178-07-14        ┆ false         ┆ null          ┆ null        ┆ null              │\n",
+       "│            ┆ 08:16:00          ┆               ┆               ┆             ┆                   │\n",
+       "│ 10016742   ┆ 2178-07-23        ┆ false         ┆ null          ┆ null        ┆ null              │\n",
+       "│            ┆ 08:19:00          ┆               ┆               ┆             ┆                   │\n",
+       "│ …          ┆ …                 ┆ …             ┆ …             ┆ …           ┆ …                 │\n",
+       "│ 10010471   ┆ 2155-12-03        ┆ true          ┆ null          ┆ null        ┆ null              │\n",
+       "│            ┆ 20:33:00          ┆               ┆               ┆             ┆                   │\n",
+       "│ 10015931   ┆ 2177-03-25        ┆ true          ┆ null          ┆ null        ┆ null              │\n",
+       "│            ┆ 21:48:07          ┆               ┆               ┆             ┆                   │\n",
+       "│ 10037861   ┆ 2117-03-15        ┆ true          ┆ null          ┆ null        ┆ null              │\n",
+       "│            ┆ 16:34:58          ┆               ┆               ┆             ┆                   │\n",
+       "│ 10037975   ┆ 2185-01-18        ┆ true          ┆ null          ┆ null        ┆ null              │\n",
+       "│            ┆ 19:12:12          ┆               ┆               ┆             ┆                   │\n",
+       "│ 10038081   ┆ 2115-10-10        ┆ true          ┆ null          ┆ null        ┆ null              │\n",
+       "│            ┆ 10:15:25          ┆               ┆               ┆             ┆                   │\n",
+       "└────────────┴───────────────────┴───────────────┴───────────────┴─────────────┴───────────────────┘"
+      ]
+     },
+     "execution_count": 67,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "!meds-tab-tabularize-time-series --multirun \"worker=range(0,2)\" \"hydra/launcher=joblib\" \"input_dir=$MIMICIV_MEDS_DIR/data\" \"output_dir=$OUTPUT_TABULARIZATION_DIR\" do_overwrite=False $WINDOW_SIZES $AGGREGATIONS"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "NmaR_-Fik4eH"
-   },
-   "outputs": [],
-   "source": [
-    "!meds-tab-cache-task \"input_dir={MIMICIV_MEDS_DIR}/data\" \"output_dir=$OUTPUT_TABULARIZATION_DIR\" \"input_label_dir=$TASK_DIR/$TASK_NAME/\" \"task_name=$TASK_NAME\" do_overwrite=False $WINDOW_SIZES $AGGREGATIONS"
+    "import polars as pl\n",
+    "\n",
+    "\n",
+    "# execute query and get results\n",
+    "df = pl.read_parquet(f\"{TASK_DIR}/{TASK_NAME}/**/*.parquet\")\n",
+    "\n",
+    "print(\"train prevalence: \" + str(round(pl.read_parquet(f\"{TASK_DIR}/{TASK_NAME}/train/*.parquet\")['boolean_value'].mean(), 3)))\n",
+    "print(\"tuning prevalence: \" + str(round(pl.read_parquet(f\"{TASK_DIR}/{TASK_NAME}/tuning/*.parquet\")['boolean_value'].mean(), 3)))\n",
+    "print(\"held_out prevalence: \" + str(round(pl.read_parquet(f\"{TASK_DIR}/{TASK_NAME}/held_out/*.parquet\")['boolean_value'].mean(), 3)))\n",
+    "\n",
+    "\n",
+    "df.sort('boolean_value')"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "id": "dLIkOzTblBB2"
-   },
+   "metadata": {},
    "outputs": [],
-   "source": [
-    "!meds-tab-xgboost --multirun \"input_dir=$MIMICIV_MEDS_DIR/data\" \"output_dir=$OUTPUT_TABULARIZATION_DIR\" \"output_model_dir=$OUTPUT_MODEL_DIR/$TASK_NAME/\" \"task_name=$TASK_NAME\" do_overwrite=False \"hydra.sweeper.n_trials=10\" $WINDOW_SIZES $AGGREGATIONS \"tabularization.min_code_inclusion_count=10\""
-   ]
+   "source": []
   }
  ],
  "metadata": {
@@ -267,11 +681,21 @@
    "provenance": []
   },
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "dev",
+   "language": "python",
    "name": "python3"
   },
   "language_info": {
-   "name": "python"
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.8"
   }
  },
  "nbformat": 4,
diff --git a/demo/configs/extract_MIMIC.yaml b/demo/configs/extract_MIMIC.yaml
index 53577f5..4002089 100644
--- a/demo/configs/extract_MIMIC.yaml
+++ b/demo/configs/extract_MIMIC.yaml
@@ -27,10 +27,10 @@ stage_configs:
     infer_schema_length: 999999999
   split_and_shard_subjects:
     n_subjects_per_shard: 1000
-  split_fracs:
-    train: 0.5
-    tuning: 0.25
-    held_out: 0.25
+    split_fracs:
+      train: 0.5
+      tuning: 0.25
+      held_out: 0.25
 
 stages:
   - shard_events
diff --git a/demo/extract_meds_data.ipynb b/demo/extract_meds_data.ipynb
index 55a228f..f6c55b6 100644
--- a/demo/extract_meds_data.ipynb
+++ b/demo/extract_meds_data.ipynb
@@ -3619,7 +3619,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -3635,7 +3635,6 @@
     "import tempfile\n",
     "import os\n",
     "from pathlib import Path\n",
-    "temp_dir = tempfile.TemporaryDirectory()\n",
     "notebook_dir = os.getcwd()\n",
     "\n",
     "ROOT_DIR=f\"{notebook_dir}/work_dir/eicu_demo/\"\n",
diff --git a/demo/meds_tab.ipynb b/demo/meds_tab.ipynb
index cde4110..20de31f 100644
--- a/demo/meds_tab.ipynb
+++ b/demo/meds_tab.ipynb
@@ -9,83 +9,6 @@
     "# Using an example MEDS tool, ACES for labeling"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
-    },
-    "collapsed": true,
-    "id": "z3_pG9YAWpKy",
-    "outputId": "efa4c286-413d-4a91-a53d-fb41769cd4f2"
-   },
-   "outputs": [],
-   "source": [
-    "#@title Install ACES\n",
-    "\n",
-    "\n",
-    "!pip install es-aces"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "H6fqe217XDhi"
-   },
-   "outputs": [],
-   "source": [
-    "# From the ACES documentation\n",
-    "\n",
-    "task_config = \"\"\"\n",
-    "description: >-\n",
-    "  This file specifies the base configuration for the prediction of a hospital los being greater than 3days,\n",
-    "  leveraging only the first 48 hours of data after admission, with a 24 hour gap between the input window\n",
-    "  and the target window. Patients who die or are discharged in the gap window are excluded. Note that this\n",
-    "  task is in-**hospital** los, not in-**ICU** los which is a different task.\n",
-    "\n",
-    "predicates:\n",
-    "  hospital_admission:\n",
-    "    code: {regex: \"HOSPITAL_ADMISSION//.*\"}\n",
-    "  hospital_discharge:\n",
-    "    code: {regex: \"HOSPITAL_DISCHARGE//.*\"}\n",
-    "  death:\n",
-    "    code: MEDS_DEATH\n",
-    "  discharge_or_death:\n",
-    "    expr: or(hospital_discharge, death)\n",
-    "\n",
-    "trigger: hospital_admission\n",
-    "\n",
-    "windows:\n",
-    "  input:\n",
-    "    start: NULL\n",
-    "    end: trigger + 48h\n",
-    "    start_inclusive: True\n",
-    "    end_inclusive: True\n",
-    "    index_timestamp: end\n",
-    "  gap:\n",
-    "    start: input.end\n",
-    "    end: start + 24h\n",
-    "    start_inclusive: False\n",
-    "    end_inclusive: True\n",
-    "    has:\n",
-    "      hospital_admission: (None, 0)\n",
-    "      discharge_or_death: (None, 0)\n",
-    "  target:\n",
-    "    start: trigger\n",
-    "    end: start + 3d\n",
-    "    start_inclusive: False\n",
-    "    end_inclusive: True\n",
-    "    label: discharge_or_death\n",
-    "\"\"\"\n",
-    "!mkdir /content/tasks/ -p\n",
-    "TASK_NAME = \"in_hospital_3d_los_after_48h\"\n",
-    "TASK_CONFIG_FP = f\"/content/tasks/{TASK_NAME}.yaml\"\n",
-    "with open(TASK_CONFIG_FP, 'w') as f:\n",
-    "    f.write(task_config)"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/pyproject.toml b/pyproject.toml
index 649eef6..152d01d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -28,7 +28,7 @@ classifiers = [
     "License :: OSI Approved :: MIT License",
     "Operating System :: OS Independent",
 ]
-dependencies = ["meds==0.3.3", "es-aces==0.5.0"]
+dependencies = ["meds>=0.3.3", "es-aces>=0.5.0"]
 
 [tool.setuptools_scm]
 
diff --git a/src/MEDS_DEV/demo/meds_cehrbert.py b/src/MEDS_DEV/demo/meds_cehrbert.py
deleted file mode 100644
index de1c37f..0000000
--- a/src/MEDS_DEV/demo/meds_cehrbert.py
+++ /dev/null
@@ -1,397 +0,0 @@
-# ---
-# jupyter:
-#   jupytext:
-#     text_representation:
-#       extension: .py
-#       format_name: percent
-#       format_version: '1.3'
-#       jupytext_version: 1.16.4
-#   kernelspec:
-#     display_name: Python 3
-#     name: python3
-# ---
-
-# %% [Colab-only] Switch Colab to python 3.12
-# !sudo apt-get install python3.12 python3.12-venv
-# import sys
-# !python3.12 -m venv meds_env
-# import os
-# os.environ['PATH'] = '/content/meds_env/bin:' + os.environ['PATH']
-# !pip install --upgrade pip
-
-# # Then in a new code cell:
-# import sys
-# sys.executable = '/content/meds_env/bin/python'
-
-# # Confirm python version is 3.12
-# !python --version
-
-# %% [markdown]
-# ## Install dependencies
-
-# %%
-!pip -q install meds_etl==0.3.6 meds_transforms==0.0.7
-
-# %% [markdown]
-# # Download MIMIC-IV demo
-
-# %%
-# macOS users should install wget (e.g. through brew)
-!wget -q -r -N -c --no-host-directories --cut-dirs=1 -np -P ./content/download https://physionet.org/files/mimic-iv-demo/2.2/
-
-# %%
-# Download pre-meds script, event config (defining how raw data is converted to meds data), and meds-transform config
-!mkdir -p ./content/meds-transform/
-!git clone --depth 1 https://github.com/mmcdermott/MEDS_transforms.git ./content/tmp/
-!mv ./content/tmp/MIMIC-IV_Example ./content/MIMIC-IV_Example
-
-# %%
-# Download MIMIC IV metadata
-MIMICIV_RAW_DIR = "https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map"
-MIMICIV_PRE_MEDS_DIR = "./content/pre_meds/"
-!mkdir {MIMICIV_PRE_MEDS_DIR}
-
-OUTPUT_DIR = "./content/download/mimic-iv-demo/2.2"
-
-files = [
-    'd_labitems_to_loinc.csv',
-    'inputevents_to_rxnorm.csv',
-    'lab_itemid_to_loinc.csv',
-    'meas_chartevents_main.csv',
-    'meas_chartevents_value.csv',
-    'numerics-summary.csv',
-    'outputevents_to_loinc.csv',
-    'proc_datetimeevents.csv',
-    'proc_itemid.csv',
-    'waveforms-summary.csv'
-]
-
-for file in files:
-    !wget -O {OUTPUT_DIR}/{file} {MIMICIV_RAW_DIR}/{file}
-    !wget -O {MIMICIV_PRE_MEDS_DIR}/{file} {MIMICIV_RAW_DIR}/{file}
-
-# %%
-# Convert to MEDS
-CURRENT_DIR = !pwd
-CURRENT_DIR = CURRENT_DIR[0]
-
-# %%
-# Convert to MEDS
-TUTORIAL_DIR = CURRENT_DIR + "/content/MIMIC-IV_Example"
-MIMICIV_RAW_DIR = CURRENT_DIR + "/content/download/mimic-iv-demo/2.2"
-MIMICIV_PRE_MEDS_DIR = CURRENT_DIR + "/content/pre_meds"
-MIMICIV_MEDS_DIR = CURRENT_DIR + "/content/meds"
-
-EVENT_CONVERSION_CONFIG_FP = CURRENT_DIR + "/content/MIMIC-IV_Example/configs/event_config.yaml"
-PIPELINE_CONFIG_PATH = CURRENT_DIR + "/content/MIMIC-IV_Example/configs/pipeline_config.yaml"
-!cd {TUTORIAL_DIR} && ./run.sh {MIMICIV_RAW_DIR} {MIMICIV_PRE_MEDS_DIR} {MIMICIV_MEDS_DIR} do_unzip=true
-
-# %% [markdown]
-# # Examine MEDS data
-
-# %%
-import polars as pl
-
-data = pl.read_parquet('./content/meds/data/**/*.parquet')
-data[['subject_id', 'time', 'code', 'numeric_value']]
-
-# %% [markdown]
-# # A simple Polars analysis
-
-# %%
-icd10_events = data.filter(pl.col('code').str.starts_with('DIAGNOSIS//ICD//10//'))
-icd10_events.group_by('code').count().sort('count', descending=True)
-
-# %%
-df = pl.read_parquet("./content/meds/metadata/codes.parquet")
-df
-
-# %% [markdown]
-# ## Using an example MEDS tool, ACES for labeling
-
-# %% [markdown]
-# ## Install ACES
-
-# %%
-!pip install es-aces
-
-# %%
-
-# From ACES documentation
-task_config = """
-description: >-
-  This file specifies the base configuration for the prediction of a hospital los being greater than 3days,
-  leveraging only the first 48 hours of data after admission, with a 24 hour gap between the input window
-  and the target window. Patients who die or are discharged in the gap window are excluded. Note that this
-  task is in-**hospital** los, not in-**ICU** los which is a different task.
-
-predicates:
-  hospital_admission:
-    code: {regex: "HOSPITAL_ADMISSION//.*"}
-  hospital_discharge:
-    code: {regex: "HOSPITAL_DISCHARGE//.*"}
-  death:
-    code: MEDS_DEATH
-  discharge_or_death:
-    expr: or(hospital_discharge, death)
-
-trigger: hospital_admission
-
-windows:
-  input:
-    start: NULL
-    end: trigger + 48h
-    start_inclusive: True
-    end_inclusive: True
-    index_timestamp: end
-  gap:
-    start: input.end
-    end: start + 24h
-    start_inclusive: False
-    end_inclusive: True
-    has:
-      hospital_admission: (None, 0)
-      discharge_or_death: (None, 0)
-  target:
-    start: trigger
-    end: start + 3d
-    start_inclusive: False
-    end_inclusive: True
-    label: discharge_or_death
-"""
-
-!mkdir ./content/tasks/ -p
-TASK_NAME = "in_hospital_3d_los_after_48h"
-TASK_CONFIG_FP = f"./content/tasks/{TASK_NAME}.yaml"
-with open(TASK_CONFIG_FP, 'w') as f:
-    f.write(task_config)
-
-
-# %%
-!pip install es-aces
-
-# %%
-!echo $TASK_NAME
-!echo $TASK_CONFIG_FP
-
-# %%
-!aces-cli --multirun data=sharded data.standard=meds data.root="$MIMICIV_MEDS_DIR/data" "data.shard=$(expand_shards  ./content/meds/data/)" cohort_dir=" ./content/tasks" cohort_name="$TASK_NAME" config_path="$TASK_CONFIG_FP"
-
-# %%
-# TODO: reimporting polars due to dependencies?
-import polars as pl
-
-# Execute query and get results
-df = pl.read_parquet(f"./content/tasks/{TASK_NAME}/**/*.parquet")
-
-print("train prevalence: " + str(round(pl.read_parquet(f"./content/tasks/{TASK_NAME}/train/*.parquet")['boolean_value'].mean(), 3)))
-print("tuning prevalence: " + str(round(pl.read_parquet(f"./content/tasks/{TASK_NAME}/tuning/*.parquet")['boolean_value'].mean(), 3)))
-print("held_out prevalence: " + str(round(pl.read_parquet(f"./content/tasks/{TASK_NAME}/held_out/*.parquet")['boolean_value'].mean(), 3)))
-
-
-df.sort('boolean_value')
-
-# %% [markdown]
-# ## Switch Colab to python 3.11 for cehrbert
-# %%
-# %%capture
-# !sudo apt-get install python3.11 python3.11-venv
-# import sys
-# !python3.11 -m venv cehrbert
-# import os
-# os.environ['PATH'] = './content/cehrbert/bin:' + os.environ['PATH']
-# !pip install --upgrade pip
-
-# %%
-# import sys
-# sys.executable = './content/cehrbert/bin/python'
-
-# %% [markdown]
-# ## Install cehrbert and its dependencies
-
-# %%
-!pip install meds_reader==0.1.9
-!pip install setuptools
-!pip install cehrbert==1.3.1
-
-# %%
-MIMICIV_MEDS_DIR = "./content/meds/"
-MIMICIV_MEDS_READER_DIR = "./content/meds_reader/"
-TASK_DIR="./content/tasks/"
-TASK_NAME="in_hospital_3d_los_after_48h"
-OUTPUT_PRETRAIN_MODEL_DIR="./content/output/cehrbert/"
-# TODO this variable has an identical name?
-OUTPUT_PRETRAIN_MODEL_DIR="./content/output/cehrbert_finetuned/"
-
-# %% [markdown]
-# Run meds_reader on the MEDS data
-
-# %%
-!meds_reader_convert $MIMICIV_MEDS_DIR $MIMICIV_MEDS_READER_DIR
-
-# %%
-!mkdir -p ./content/output/cehrbert/
-!mkdir -p ./content/output/cehrbert_dataset_prepared/
-!mkdir -p ./content/output/cehrbert_finetuned/
-
-# %%
-# !mkdir ./content/github_repo;cd ./content/github_repo;git clone https://github.com/cumc-dbmi/cehrbert.git;cd cehrbert;git checkout fix/meds_evaluation;pip install .;
-
-# %% [markdown]
-# Create the cehrbert pretraining configuration yaml file
-
-# %%
-cehrbert_pretrain_config = """
-#Model arguments
-model_name_or_path: "./content/output/cehrbert/"
-tokenizer_name_or_path: "./content/output/cehrbert/"
-num_hidden_layers: 6
-max_position_embeddings: 1024
-hidden_size: 768
-vocab_size: 100000
-min_frequency: 50
-include_value_prediction: false # additional CEHR-BERT learning objective
-
-#Data arguments
-data_folder: "./content/meds_reader/"
-dataset_prepared_path: "./content/output/cehrbert_dataset_prepared/"
-
-# Below is a list of Med-to-CehrBert related arguments
-preprocessing_num_workers: 2
-preprocessing_batch_size: 128
-# if is_data_in_med is false, it assumes the data is in the cehrbert format
-is_data_in_meds: true
-att_function_type: "cehr_bert"
-inpatient_att_function_type: "mix"
-include_auxiliary_token: true
-include_demographic_prompt: false
-# if the data is in the meds format, the validation split will be omitted
-# as the meds already provide train/tuning/held_out splits
-validation_split_percentage: 0.05
-
-# Huggingface Arguments
-dataloader_num_workers: 2
-dataloader_prefetch_factor: 2
-
-overwrite_output_dir: false
-resume_from_checkpoint: # automatically infer the latest checkpoint from the output folder
-seed: 42
-
-output_dir: "./content/output/cehrbert/"
-evaluation_strategy: "epoch"
-save_strategy: "epoch"
-eval_accumulation_steps: 10
-
-learning_rate: 0.00005
-per_device_train_batch_size: 8
-per_device_eval_batch_size: 8
-gradient_accumulation_steps: 2
-
-num_train_epochs: 50 # for large datasets, 5-10 epochs should suffice
-warmup_steps: 10
-weight_decay: 0.01
-logging_dir: "./logs"
-logging_steps: 10
-
-save_total_limit:
-load_best_model_at_end: true
-metric_for_best_model: "eval_loss"
-greater_is_better: false
-
-report_to: "none"
-"""
-PRETRAIN_CONFIG_FP = f"./content/output/cehrbert/cehrbert_pretrain_config.yaml"
-with open(PRETRAIN_CONFIG_FP, 'w') as f:
-    f.write(cehrbert_pretrain_config)
-
-# %% [markdown]
-# ## Pretrain cehrbert using MLM
-!python3.11 -m cehrbert.runners.hf_cehrbert_pretrain_runner ./content/output/cehrbert/cehrbert_pretrain_config.yaml
-
-# %% [markdown]
-# ## Create the cehrbert finetuning configuration yaml file
-cehrbert_finetune_config = f"""
-#Model arguments
-model_name_or_path: "./content/output/cehrbert/"
-tokenizer_name_or_path: "./content/output/cehrbert/"
-num_hidden_layers: 6
-max_position_embeddings: 1024
-hidden_size: 768
-vocab_size: 100000
-min_frequency: 50
-include_value_prediction: false # additional CEHR-BERT learning objective
-
-#Data arguments
-cohort_folder: "./content/tasks/{TASK_NAME}/"
-data_folder: "./content/meds_reader/"
-dataset_prepared_path: "./content/output/cehrbert_dataset_prepared/"
-
-#LORA
-use_lora: True
-lora_rank: 64
-lora_alpha: 16
-target_modules: [ "query", "value" ]
-lora_dropout: 0.1
-
-# Below is a list of Med-to-CehrBert related arguments
-preprocessing_num_workers: 2
-preprocessing_batch_size: 128
-# if is_data_in_med is false, it assumes the data is in the cehrbert format
-is_data_in_meds: true
-att_function_type: "cehr_bert"
-inpatient_att_function_type: "mix"
-include_auxiliary_token: true
-include_demographic_prompt: false
-# if the data is in the meds format, the validation split will be omitted
-# as the meds already provide train/tuning/held_out splits
-validation_split_percentage: 0.05
-
-# Huggingface Arguments
-dataloader_num_workers: 2
-dataloader_prefetch_factor: 2
-
-overwrite_output_dir: false
-resume_from_checkpoint: # automatically infer the latest checkpoint from the output folder
-seed: 42
-
-output_dir: "./content/output/cehrbert_finetuned"
-evaluation_strategy: "epoch"
-save_strategy: "epoch"
-eval_accumulation_steps: 10
-
-do_train: True
-do_predict: True
-
-learning_rate: 0.00005
-per_device_train_batch_size: 8
-per_device_eval_batch_size: 8
-gradient_accumulation_steps: 2
-
-num_train_epochs: 10
-warmup_steps: 10
-weight_decay: 0.01
-logging_dir: "./logs"
-logging_steps: 10
-
-save_total_limit:
-load_best_model_at_end: true
-metric_for_best_model: "eval_loss"
-greater_is_better: false
-
-report_to: "none"
-"""
-FINETUNE_CONFIG_FP = f"./content/output/cehrbert/cehrbert_finetune_config.yaml"
-with open(FINETUNE_CONFIG_FP, 'w') as f:
-    f.write(cehrbert_finetune_config)
-
-# %%
-# ## Finetune cehrbert for the downstream task
-!python3.11 -m cehrbert.runners.hf_cehrbert_finetune_runner ./content/output/cehrbert/cehrbert_finetune_config.yaml
-
-# %%
-import pandas as pd
-
-pd.read_parquet("./content/output/cehrbert_finetuned/test_predictions")
-
-# %%
-!cat ./content/output/cehrbert_finetuned/test_results.json
diff --git a/src/MEDS_DEV/demo/meds_tab.ipynb b/src/MEDS_DEV/demo/meds_tab.ipynb
deleted file mode 100644
index 86e5791..0000000
--- a/src/MEDS_DEV/demo/meds_tab.ipynb
+++ /dev/null
@@ -1,485 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "xLJYigczPaTG"
-   },
-   "outputs": [],
-   "source": [
-    "#@title Swtich Colab to python 3.12\n",
-    "%%capture\n",
-    "!sudo apt-get install python3.12 python3.12-venv\n",
-    "import sys\n",
-    "!python3.12 -m venv meds_env\n",
-    "import os\n",
-    "os.environ['PATH'] = '/content/meds_env/bin:' + os.environ['PATH']\n",
-    "!pip install --upgrade pip\n",
-    "\n",
-    "# Then in a new code cell:\n",
-    "import sys\n",
-    "sys.executable = '/content/meds_env/bin/python'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
-    },
-    "id": "w8Zrf0NePwfs",
-    "outputId": "6aa313bc-5141-453c-88f5-8c1d22956f3d"
-   },
-   "outputs": [],
-   "source": [
-    "# confirm python version is 3.12\n",
-    "!python --version"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "ikPVQZOnPcI0"
-   },
-   "outputs": [],
-   "source": [
-    "#@title Install dependencies\n",
-    "!pip -q install meds_etl==0.3.6 meds_transforms==0.0.7"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": true,
-    "id": "rjqK4CuRPfnE"
-   },
-   "outputs": [],
-   "source": [
-    "#@title Download MIMIC-IV demo\n",
-    "\n",
-    "!wget -q -r -N -c --no-host-directories --cut-dirs=1 -np -P download https://physionet.org/files/mimic-iv-demo/2.2/"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
-    },
-    "id": "qHOBI1_5StBb",
-    "outputId": "eb0ef7ec-54c8-4cac-b1ff-d176c986a447"
-   },
-   "outputs": [],
-   "source": [
-    "# Download pre-meds script, event config (defining how raw data is converted to meds data), and meds-transform config\n",
-    "!mkdir /content/meds-transform/\n",
-    "!git clone --depth 1 https://github.com/mmcdermott/MEDS_transforms.git /content/tmp/\n",
-    "!mv /content/tmp/MIMIC-IV_Example /content/MIMIC-IV_Example"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
-    },
-    "id": "Sr2QdvNxpd0p",
-    "outputId": "7877300f-afc5-4583-95f2-e4f7089356b6"
-   },
-   "outputs": [],
-   "source": [
-    "# download MIMIC IV metadata\n",
-    "MIMICIV_RAW_DIR = \"https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map\"\n",
-    "MIMICIV_PRE_MEDS_DIR = \"/content/pre_meds/\"\n",
-    "!mkdir {MIMICIV_PRE_MEDS_DIR}\n",
-    "\n",
-    "OUTPUT_DIR = \"/content/download/mimic-iv-demo/2.2\"\n",
-    "\n",
-    "files = [\n",
-    "    'd_labitems_to_loinc.csv',\n",
-    "    'inputevents_to_rxnorm.csv',\n",
-    "    'lab_itemid_to_loinc.csv',\n",
-    "    'meas_chartevents_main.csv',\n",
-    "    'meas_chartevents_value.csv',\n",
-    "    'numerics-summary.csv',\n",
-    "    'outputevents_to_loinc.csv',\n",
-    "    'proc_datetimeevents.csv',\n",
-    "    'proc_itemid.csv',\n",
-    "    'waveforms-summary.csv'\n",
-    "]\n",
-    "\n",
-    "for file in files:\n",
-    "    !wget -O {OUTPUT_DIR}/{file} {MIMICIV_RAW_DIR}/{file}\n",
-    "    !wget -O {MIMICIV_PRE_MEDS_DIR}/{file} {MIMICIV_RAW_DIR}/{file}"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
-    },
-    "id": "pQSLxYJhRPxm",
-    "outputId": "41ab56f5-512c-4489-adfc-614644c6c632"
-   },
-   "outputs": [],
-   "source": [
-    "# Convert to MEDS\n",
-    "TUTORIAL_DIR = \"/content/MIMIC-IV_Example\"\n",
-    "MIMICIV_RAW_DIR = \"/content/download/mimic-iv-demo/2.2\"\n",
-    "MIMICIV_PRE_MEDS_DIR = \"/content/pre_meds/\"\n",
-    "MIMICIV_MEDS_DIR = \"/content/meds/\"\n",
-    "\n",
-    "EVENT_CONVERSION_CONFIG_FP=\"/content/MIMIC-IV_Example/configs/event_config.yaml\"\n",
-    "PIPELINE_CONFIG_PATH=\"/content/MIMIC-IV_Example/configs/pipeline_config.yaml\"\n",
-    "!cd {TUTORIAL_DIR} && /content/MIMIC-IV_Example/run.sh {MIMICIV_RAW_DIR} {MIMICIV_PRE_MEDS_DIR} {MIMICIV_MEDS_DIR} do_unzip=true"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 443
-    },
-    "id": "u2f6socuWhjd",
-    "outputId": "289bc4ae-e374-4ed1-fd98-58c803f14e26"
-   },
-   "outputs": [],
-   "source": [
-    "#@title Examine MEDS data\n",
-    "\n",
-    "import polars as pl\n",
-    "data = pl.read_parquet('/content/meds/data/**/*.parquet')\n",
-    "\n",
-    "data[['subject_id', 'time', 'code', 'numeric_value']]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 479
-    },
-    "id": "CZD9xpSxd1Wp",
-    "outputId": "ea758e42-b71d-464f-f931-df7eec7a4415"
-   },
-   "outputs": [],
-   "source": [
-    "#@title A Simple Polars Analysis\n",
-    "\n",
-    "icd10_events = data.filter(pl.col('code').str.starts_with('DIAGNOSIS//ICD//10//'))\n",
-    "\n",
-    "icd10_events.group_by('code').count().sort('count', descending=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 443
-    },
-    "id": "u7EXKCZelho-",
-    "outputId": "27e81b86-1195-4c6c-f7c7-993665b826d7"
-   },
-   "outputs": [],
-   "source": [
-    "df = pl.read_parquet(\"/content/meds/metadata/codes.parquet\")\n",
-    "df"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "PZmYRVX2W8m7"
-   },
-   "source": [
-    "# Using an example MEDS tool, ACES for labeling"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
-    },
-    "collapsed": true,
-    "id": "z3_pG9YAWpKy",
-    "outputId": "efa4c286-413d-4a91-a53d-fb41769cd4f2"
-   },
-   "outputs": [],
-   "source": [
-    "#@title Install ACES\n",
-    "\n",
-    "\n",
-    "!pip install es-aces"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "H6fqe217XDhi"
-   },
-   "outputs": [],
-   "source": [
-    "# From the ACES documentation\n",
-    "\n",
-    "task_config = \"\"\"\n",
-    "description: >-\n",
-    "  This file specifies the base configuration for the prediction of a hospital los being greater than 3days,\n",
-    "  leveraging only the first 48 hours of data after admission, with a 24 hour gap between the input window\n",
-    "  and the target window. Patients who die or are discharged in the gap window are excluded. Note that this\n",
-    "  task is in-**hospital** los, not in-**ICU** los which is a different task.\n",
-    "\n",
-    "predicates:\n",
-    "  hospital_admission:\n",
-    "    code: {regex: \"HOSPITAL_ADMISSION//.*\"}\n",
-    "  hospital_discharge:\n",
-    "    code: {regex: \"HOSPITAL_DISCHARGE//.*\"}\n",
-    "  death:\n",
-    "    code: MEDS_DEATH\n",
-    "  discharge_or_death:\n",
-    "    expr: or(hospital_discharge, death)\n",
-    "\n",
-    "trigger: hospital_admission\n",
-    "\n",
-    "windows:\n",
-    "  input:\n",
-    "    start: NULL\n",
-    "    end: trigger + 48h\n",
-    "    start_inclusive: True\n",
-    "    end_inclusive: True\n",
-    "    index_timestamp: end\n",
-    "  gap:\n",
-    "    start: input.end\n",
-    "    end: start + 24h\n",
-    "    start_inclusive: False\n",
-    "    end_inclusive: True\n",
-    "    has:\n",
-    "      hospital_admission: (None, 0)\n",
-    "      discharge_or_death: (None, 0)\n",
-    "  target:\n",
-    "    start: trigger\n",
-    "    end: start + 3d\n",
-    "    start_inclusive: False\n",
-    "    end_inclusive: True\n",
-    "    label: discharge_or_death\n",
-    "\"\"\"\n",
-    "!mkdir /content/tasks/ -p\n",
-    "TASK_NAME = \"in_hospital_3d_los_after_48h\"\n",
-    "TASK_CONFIG_FP = f\"/content/tasks/{TASK_NAME}.yaml\"\n",
-    "with open(TASK_CONFIG_FP, 'w') as f:\n",
-    "    f.write(task_config)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
-    },
-    "id": "es-39eHOyp5a",
-    "outputId": "9d5e2468-fdd5-4c4b-8615-fe24f5a9310f"
-   },
-   "outputs": [],
-   "source": [
-    "!pip install es-aces"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
-    },
-    "id": "bXLiJGEry-Gb",
-    "outputId": "7d954ab4-cf5c-4d02-a99c-669b5822bf44"
-   },
-   "outputs": [],
-   "source": [
-    "!aces-cli --multirun data=sharded data.standard=meds data.root=\"$MIMICIV_MEDS_DIR/data\" \"data.shard=$(expand_shards  /content/meds/data/)\" cohort_dir=\" /content/tasks\" cohort_name=\"$TASK_NAME\" config_path=\"$TASK_CONFIG_FP\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 497
-    },
-    "id": "7Vvac7DIWyRT",
-    "outputId": "40493f0e-48ba-4f5e-9d9a-401e26f1a9b7"
-   },
-   "outputs": [],
-   "source": [
-    "import polars as pl\n",
-    "\n",
-    "# execute query and get results\n",
-    "df = pl.read_parquet(f\"/content/tasks/{TASK_NAME}/**/*.parquet\")\n",
-    "\n",
-    "print(\"train prevalence: \" + str(round(pl.read_parquet(f\"/content/tasks/{TASK_NAME}/train/*.parquet\")['boolean_value'].mean(), 3)))\n",
-    "print(\"tuning prevalence: \" + str(round(pl.read_parquet(f\"/content/tasks/{TASK_NAME}/tuning/*.parquet\")['boolean_value'].mean(), 3)))\n",
-    "print(\"held_out prevalence: \" + str(round(pl.read_parquet(f\"/content/tasks/{TASK_NAME}/held_out/*.parquet\")['boolean_value'].mean(), 3)))\n",
-    "\n",
-    "\n",
-    "df.sort('boolean_value')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 1000
-    },
-    "id": "XWB7O1UGhRIo",
-    "outputId": "e3416d5e-7427-4cf4-c0ab-20053a9d3430"
-   },
-   "outputs": [],
-   "source": [
-    "#@title Install meds-tab\n",
-    "\n",
-    "!pip uninstall es-aces -y\n",
-    "!pip install meds-tab"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "SeGawIqli0nn"
-   },
-   "outputs": [],
-   "source": [
-    "MIMICIV_MEDS_DIR = \"/content/meds/\"\n",
-    "OUTPUT_TABULARIZATION_DIR=\"/content/tabularized/\"\n",
-    "TASK_DIR=\"/content/tasks/\"\n",
-    "TASK_NAME=\"in_hospital_3d_los_after_48h\"\n",
-    "OUTPUT_MODEL_DIR=\"/content/output/meds_tab/\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
-    },
-    "id": "Tud0_0cgjljP",
-    "outputId": "fb3417e0-3ba4-4f9a-ab95-ce3ba8731ca1"
-   },
-   "outputs": [],
-   "source": [
-    "!meds-tab-describe input_dir={MIMICIV_MEDS_DIR}/data output_dir={OUTPUT_TABULARIZATION_DIR}"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "RVLBdOn1mnV5"
-   },
-   "outputs": [],
-   "source": [
-    "# Define the window sizes and aggregations to generate features for\n",
-    "WINDOW_SIZES = \"tabularization.window_sizes=[1d,30d,365d]\"\n",
-    "AGGREGATIONS = \"tabularization.aggs=[static/present,code/count,value/count,value/sum,value/sum_sqd,value/min,value/max]\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "KhCPqBmduNYK"
-   },
-   "outputs": [],
-   "source": [
-    "!rm -rf /content/tabularized/tabularize/"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
-    },
-    "id": "p_D07KzxjVUl",
-    "outputId": "8836b076-cf64-4f29-da81-ac5125ab7608"
-   },
-   "outputs": [],
-   "source": [
-    "!meds-tab-tabularize-static \"input_dir=$MIMICIV_MEDS_DIR/data\" \"output_dir=$OUTPUT_TABULARIZATION_DIR\" do_overwrite=False $WINDOW_SIZES $AGGREGATIONS"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
-    },
-    "id": "u-e-mV2Hk-Qf",
-    "outputId": "c292be12-ff74-44e4-f039-758e10ccc909"
-   },
-   "outputs": [],
-   "source": [
-    "!meds-tab-tabularize-time-series --multirun \"worker=range(0,2)\" \"hydra/launcher=joblib\" \"input_dir=$MIMICIV_MEDS_DIR/data\" \"output_dir=$OUTPUT_TABULARIZATION_DIR\" do_overwrite=False $WINDOW_SIZES $AGGREGATIONS"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "NmaR_-Fik4eH"
-   },
-   "outputs": [],
-   "source": [
-    "!meds-tab-cache-task \"input_dir={MIMICIV_MEDS_DIR}/data\" \"output_dir=$OUTPUT_TABULARIZATION_DIR\" \"input_label_dir=$TASK_DIR/$TASK_NAME/\" \"task_name=$TASK_NAME\" do_overwrite=False $WINDOW_SIZES $AGGREGATIONS"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "dLIkOzTblBB2"
-   },
-   "outputs": [],
-   "source": [
-    "!meds-tab-xgboost --multirun \"input_dir=$MIMICIV_MEDS_DIR/data\" \"output_dir=$OUTPUT_TABULARIZATION_DIR\" \"output_model_dir=$OUTPUT_MODEL_DIR/$TASK_NAME/\" \"task_name=$TASK_NAME\" do_overwrite=False \"hydra.sweeper.n_trials=10\" $WINDOW_SIZES $AGGREGATIONS \"tabularization.min_code_inclusion_count=10\""
-   ]
-  }
- ],
- "metadata": {
-  "colab": {
-   "provenance": []
-  },
-  "kernelspec": {
-   "display_name": "Python 3",
-   "name": "python3"
-  },
-  "language_info": {
-   "name": "python"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}
diff --git a/src/MEDS_DEV/demo/meds_tab.py b/src/MEDS_DEV/demo/meds_tab.py
deleted file mode 100644
index 4fae3ae..0000000
--- a/src/MEDS_DEV/demo/meds_tab.py
+++ /dev/null
@@ -1,240 +0,0 @@
-# ---
-# jupyter:
-#   jupytext:
-#     text_representation:
-#       extension: .py
-#       format_name: percent
-#       format_version: '1.3'
-#       jupytext_version: 1.16.4
-#   kernelspec:
-#     display_name: Python 3
-#     name: python3
-# ---
-
-# %% [Colab-only] Switch Colab to python 3.12
-# !sudo apt-get install python3.12 python3.12-venv
-# import sys
-# !python3.12 -m venv meds_env
-# import os
-# os.environ['PATH'] = '/content/meds_env/bin:' + os.environ['PATH']
-# !pip install --upgrade pip
-
-# # Then in a new code cell:
-# import sys
-# sys.executable = '/content/meds_env/bin/python'
-
-# # Confirm python version is 3.12
-# !python --version
-
-# %%
-!pwd # Should be .../src/MEDS_DEV/demo
-
-# %% [markdown]
-# ## Install dependencies
-
-# %%
-!pip -q install meds_etl==0.3.6 meds_transforms==0.0.7
-
-# TODO install meds-evaluation
-
-# %% [markdown]
-# # Download MIMIC-IV demo
-
-# %%
-# macOS users should install wget (e.g. through brew)
-!wget -q -r -N -c --no-host-directories --cut-dirs=1 -np -P ./content/download https://physionet.org/files/mimic-iv-demo/2.2/
-
-# %%
-# Download pre-meds script, event config (defining how raw data is converted to meds data), and meds-transform config
-!mkdir -p ./content/meds-transform/
-!git clone --depth 1 https://github.com/mmcdermott/MEDS_transforms.git ./content/tmp/
-!mv ./content/tmp/MIMIC-IV_Example ./content/MIMIC-IV_Example
-
-# %%
-# Download MIMIC-IV metadata
-MIMICIV_RAW_DIR = "https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map"
-MIMICIV_PRE_MEDS_DIR = "./content/pre_meds/"
-!mkdir {MIMICIV_PRE_MEDS_DIR}
-
-OUTPUT_DIR = "./content/download/mimic-iv-demo/2.2"
-
-files = [
-    'd_labitems_to_loinc.csv',
-    'inputevents_to_rxnorm.csv',
-    'lab_itemid_to_loinc.csv',
-    'meas_chartevents_main.csv',
-    'meas_chartevents_value.csv',
-    'numerics-summary.csv',
-    'outputevents_to_loinc.csv',
-    'proc_datetimeevents.csv',
-    'proc_itemid.csv',
-    'waveforms-summary.csv'
-]
-
-for file in files:
-    !wget -O {OUTPUT_DIR}/{file} {MIMICIV_RAW_DIR}/{file}
-    !wget -O {MIMICIV_PRE_MEDS_DIR}/{file} {MIMICIV_RAW_DIR}/{file}
-
-# %%
-# Convert to MEDS
-CURRENT_DIR = !pwd
-CURRENT_DIR = CURRENT_DIR[0]
-# %%
-TUTORIAL_DIR = CURRENT_DIR + "/content/MIMIC-IV_Example"
-MIMICIV_RAW_DIR = CURRENT_DIR + "/content/download/mimic-iv-demo/2.2"
-MIMICIV_PRE_MEDS_DIR = CURRENT_DIR + "/content/pre_meds"
-MIMICIV_MEDS_DIR = CURRENT_DIR + "/content/meds"
-
-EVENT_CONVERSION_CONFIG_FP = CURRENT_DIR + "/content/MIMIC-IV_Example/configs/event_config.yaml"
-PIPELINE_CONFIG_PATH = CURRENT_DIR + "/content/MIMIC-IV_Example/configs/pipeline_config.yaml"
-!cd {TUTORIAL_DIR} && ./run.sh {MIMICIV_RAW_DIR} {MIMICIV_PRE_MEDS_DIR} {MIMICIV_MEDS_DIR} do_unzip=true
-
-# %% [markdown]
-# # Examine MEDS data
-
-# %%
-import polars as pl
-
-data = pl.read_parquet('./content/meds/data/**/*.parquet')
-
-data[['subject_id', 'time', 'code', 'numeric_value']]
-
-# %% [markdown]
-# # A simple Polars analysis
-
-# %%
-icd10_events = data.filter(pl.col('code').str.starts_with('DIAGNOSIS//ICD//10//'))
-icd10_events.group_by('code').count().sort('count', descending=True)
-
-# %%
-df = pl.read_parquet("./content/meds/metadata/codes.parquet")
-df
-
-# %% [markdown]
-# ## Using an example MEDS tool, ACES for labeling
-
-# %% [markdown]
-# ## Install ACES
-
-# %%
-!pip install es-aces
-
-# %%
-
-# From ACES documentation
-task_config = """
-description: >-
-  This file specifies the base configuration for the prediction of a hospital los being greater than 3days,
-  leveraging only the first 48 hours of data after admission, with a 24 hour gap between the input window
-  and the target window. Patients who die or are discharged in the gap window are excluded. Note that this
-  task is in-**hospital** los, not in-**ICU** los which is a different task.
-
-predicates:
-  hospital_admission:
-    code: {regex: "HOSPITAL_ADMISSION//.*"}
-  hospital_discharge:
-    code: {regex: "HOSPITAL_DISCHARGE//.*"}
-  death:
-    code: MEDS_DEATH
-  discharge_or_death:
-    expr: or(hospital_discharge, death)
-
-trigger: hospital_admission
-
-windows:
-  input:
-    start: NULL
-    end: trigger + 48h
-    start_inclusive: True
-    end_inclusive: True
-    index_timestamp: end
-  gap:
-    start: input.end
-    end: start + 24h
-    start_inclusive: False
-    end_inclusive: True
-    has:
-      hospital_admission: (None, 0)
-      discharge_or_death: (None, 0)
-  target:
-    start: trigger
-    end: start + 3d
-    start_inclusive: False
-    end_inclusive: True
-    label: discharge_or_death
-"""
-
-!mkdir ./content/tasks/ -p
-TASK_NAME = "in_hospital_3d_los_after_48h"
-TASK_CONFIG_FP = f"./content/tasks/{TASK_NAME}.yaml"
-with open(TASK_CONFIG_FP, 'w') as f:
-    f.write(task_config)
-
-# %%
-!pip install es-aces
-
-# %%
-!aces-cli --multirun data=sharded data.standard=meds data.root="$MIMICIV_MEDS_DIR/data" "data.shard=$(expand_shards  ./content/meds/data/)" cohort_dir=" ./content/tasks" cohort_name="$TASK_NAME" config_path="$TASK_CONFIG_FP"
-
-# %%
-# TODO: reimporting polars due to dependencies?
-import polars as pl
-
-# Execute query and get results
-df = pl.read_parquet(f"./content/tasks/{TASK_NAME}/**/*.parquet")
-
-print("train prevalence: " + str(round(pl.read_parquet(f"./content/tasks/{TASK_NAME}/train/*.parquet")['boolean_value'].mean(), 3)))
-print("tuning prevalence: " + str(round(pl.read_parquet(f"./content/tasks/{TASK_NAME}/tuning/*.parquet")['boolean_value'].mean(), 3)))
-print("held_out prevalence: " + str(round(pl.read_parquet(f"./content/tasks/{TASK_NAME}/held_out/*.parquet")['boolean_value'].mean(), 3)))
-
-
-df.sort('boolean_value')
-
-# %%
-# ## Install meds-tab
-
-!pip uninstall es-aces -y # TODO ???
-!pip install meds-tab
-
-# %%
-MIMICIV_MEDS_DIR = "./content/meds/"
-OUTPUT_TABULARIZATION_DIR="./content/tabularized/"
-TASK_DIR="./content/tasks/"
-TASK_NAME="in_hospital_3d_los_after_48h"
-OUTPUT_MODEL_DIR="./content/output/meds_tab/"
-
-# %%
-!meds-tab-describe input_dir={MIMICIV_MEDS_DIR}/data output_dir={OUTPUT_TABULARIZATION_DIR}
-
-# %%
-# Define the window sizes and aggregations to generate features for
-# TODO define this as system variables or make sure the shell
-# commands can find these
-WINDOW_SIZES = "tabularization.window_sizes=[1d,30d,365d]"
-AGGREGATIONS = "tabularization.aggs=[static/present,code/count,value/count,value/sum,value/sum_sqd,value/min,value/max]"
-
-# %%
-!rm -rf ./content/tabularized/tabularize/
-
-# %%
-# TODO shell vs python variables
-!echo {OUTPUT_TABULARIZATION_DIR}
-
-# %%
-# TODO shell vs python variables
-!echo WINDOW_SIZES
-# %%
-# TODO shell vs python variables
-!meds-tab-tabularize-static "input_dir=$MIMICIV_MEDS_DIR/data" "output_dir=$OUTPUT_TABULARIZATION_DIR" do_overwrite=False $WINDOW_SIZES $AGGREGATIONS
-
-# %%
-# TODO shell vs python variables
-!meds-tab-tabularize-time-series --multirun "worker=range(0,2)" "hydra/launcher=joblib" "input_dir=$MIMICIV_MEDS_DIR/data" "output_dir=$OUTPUT_TABULARIZATION_DIR" do_overwrite=False $WINDOW_SIZES $AGGREGATIONS
-
-# %%
-# TODO shell vs python variables
-!meds-tab-cache-task "input_dir={MIMICIV_MEDS_DIR}/data" "output_dir=$OUTPUT_TABULARIZATION_DIR" "input_label_dir=$TASK_DIR/$TASK_NAME/" "task_name=$TASK_NAME" do_overwrite=False $WINDOW_SIZES $AGGREGATIONS
-
-# %%
-# TODO shell vs python variables
-!meds-tab-xgboost --multirun "input_dir=$MIMICIV_MEDS_DIR/data" "output_dir=$OUTPUT_TABULARIZATION_DIR" "output_model_dir=$OUTPUT_MODEL_DIR/$TASK_NAME/" "task_name=$TASK_NAME" do_overwrite=False "hydra.sweeper.n_trials=10" $WINDOW_SIZES $AGGREGATIONS "tabularization.min_code_inclusion_count=10"