From 5a98d43e114c32983dadf3cd7394468dd3a1075d Mon Sep 17 00:00:00 2001 From: Nassim Oufattole Date: Sat, 14 Dec 2024 17:34:27 -0800 Subject: [PATCH] Ran e2e the extraction and aces demos, works as expected --- demo/aces.ipynb | 520 +-- demo/extract_meds_data.ipynb | 6516 +++++++++++++++++++++++++--------- 2 files changed, 5097 insertions(+), 1939 deletions(-) diff --git a/demo/aces.ipynb b/demo/aces.ipynb index 7b31586..db13aab 100644 --- a/demo/aces.ipynb +++ b/demo/aces.ipynb @@ -11,7 +11,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -20,7 +20,36 @@ "id": "z3_pG9YAWpKy", "outputId": "efa4c286-413d-4a91-a53d-fb41769cd4f2" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: es-aces in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (0.6.1)\n", + "Requirement already satisfied: polars<=1.17.1,>=1.0.0 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from es-aces) (1.17.1)\n", + "Requirement already satisfied: bigtree==0.18.* in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from es-aces) (0.18.3)\n", + "Requirement already satisfied: ruamel.yaml==0.18.* in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from es-aces) (0.18.6)\n", + "Requirement already satisfied: loguru==0.7.* in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from es-aces) (0.7.3)\n", + "Requirement already satisfied: hydra-core==1.3.* in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from es-aces) (1.3.2)\n", + "Requirement already satisfied: pytimeparse==1.1.* in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from es-aces) (1.1.8)\n", + "Requirement already satisfied: networkx==3.3.* in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from es-aces) (3.3)\n", + "Requirement already satisfied: pyarrow==17.* in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from es-aces) (17.0.0)\n", + "Requirement already satisfied: meds==0.3.3 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from es-aces) (0.3.3)\n", + "Requirement already satisfied: omegaconf<2.4,>=2.2 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from hydra-core==1.3.*->es-aces) (2.3.0)\n", + "Requirement already satisfied: antlr4-python3-runtime==4.9.* in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from hydra-core==1.3.*->es-aces) (4.9.3)\n", + "Requirement already satisfied: packaging in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from hydra-core==1.3.*->es-aces) (24.2)\n", + "Requirement already satisfied: jsonschema>=4.0.0 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds==0.3.3->es-aces) (4.23.0)\n", + "Requirement already satisfied: typing-extensions>=4.0 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds==0.3.3->es-aces) (4.12.2)\n", + "Requirement already satisfied: numpy>=1.16.6 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from pyarrow==17.*->es-aces) (2.2.0)\n", + "Requirement already satisfied: ruamel.yaml.clib>=0.2.7 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from ruamel.yaml==0.18.*->es-aces) (0.2.12)\n", + "Requirement already satisfied: attrs>=22.2.0 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from jsonschema>=4.0.0->meds==0.3.3->es-aces) (24.2.0)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from jsonschema>=4.0.0->meds==0.3.3->es-aces) (2024.10.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from jsonschema>=4.0.0->meds==0.3.3->es-aces) (0.35.1)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from jsonschema>=4.0.0->meds==0.3.3->es-aces) (0.22.3)\n", + "Requirement already satisfied: PyYAML>=5.1.0 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from omegaconf<2.4,>=2.2->hydra-core==1.3.*->es-aces) (6.0.2)\n" + ] + } + ], "source": [ "#@title Install ACES\n", "\n", @@ -30,7 +59,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -58,7 +87,7 @@ }, { "cell_type": "code", - "execution_count": 60, + "execution_count": null, "metadata": { "id": "H6fqe217XDhi" }, @@ -67,8 +96,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "TASK_DIR\n", - "mkdir: -p: File exists\n" + "TASK_DIR\n" ] } ], @@ -118,17 +146,16 @@ "\"\"\"\n", "MEDS_DIR = ROOT_DIR + \"/meds\"\n", "TASK_DIR = MEDS_DIR + \"/task_labels\"\n", - "! echo TASK_DIR\n", "TASK_NAME = \"los_in_hospital_first_48h\"\n", "TASK_CONFIG_FP = f\"{TASK_DIR}/{TASK_NAME}.yaml\"\n", - "!mkdir {TASK_DIR}/{TASK_NAME} -p\n", + "!mkdir -p {TASK_DIR}/{TASK_NAME}\n", "with open(TASK_CONFIG_FP, 'w') as f:\n", " f.write(task_config)" ] }, { "cell_type": "code", - "execution_count": 61, + "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -141,13 +168,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "[2024-12-14 17:02:13,334][HYDRA] Launching 3 jobs locally\n", - "[2024-12-14 17:02:13,334][HYDRA] \t#0 : data=sharded data.standard=meds data.root=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/data data.shard=held_out/0 cohort_dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels cohort_name=los_in_hospital_first_48h config_path=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml\n", - "\u001b[32m2024-12-14 17:02:13.542\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml'\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.545\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.545\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.545\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.547\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n", + "[2024-12-14 17:33:45,826][HYDRA] Launching 3 jobs locally\n", + "[2024-12-14 17:33:45,826][HYDRA] \t#0 : data=sharded data.standard=meds data.root=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/data data.shard=held_out/0 cohort_dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels cohort_name=los_in_hospital_first_48h config_path=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml\n", + "\u001b[32m2024-12-14 17:33:46.042\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml'\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.045\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.045\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.045\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.047\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n", "standard: meds\n", "ts_format: '%m/%d/%Y %H:%M'\n", "root: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/data\n", @@ -155,37 +182,37 @@ "path: ${data.root}/${data.shard}.parquet\n", "_prefix: /${data.shard}\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.547\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.561\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.566\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_admission'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.567\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_discharge'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.568\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.568\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.575\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.576\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.576\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.576\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.578\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n", + "\u001b[32m2024-12-14 17:33:46.047\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.067\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.074\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_admission'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.076\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_discharge'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.077\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.077\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.085\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.086\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.086\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.086\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.088\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n", "trigger\n", "┣━━ input.end\n", "┃ ┣━━ input.start\n", "┃ ┗━━ gap.end\n", "┗━━ target.end\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.578\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.578\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.579\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.580\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 4,155 rows as they failed to satisfy '1 <= hospital_admission <= None'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.580\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.593\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.622\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.630\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 0 rows as they failed to satisfy 'None <= hospital_admission <= 0'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.630\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 4 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.634\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.642\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 16 valid rows returned corresponding to 10 subjects.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.642\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'discharge_or_death' from window 'target'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.642\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.645\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n", + "\u001b[32m2024-12-14 17:33:46.088\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.088\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.090\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.091\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 12,159 rows as they failed to satisfy '1 <= hospital_admission <= None'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.092\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.110\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.146\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.155\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 0 rows as they failed to satisfy 'None <= hospital_admission <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.155\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 8 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.160\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.173\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 39 valid rows returned corresponding to 23 subjects.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.173\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'discharge_or_death' from window 'target'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.174\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.176\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n", "If you need these columns, please comment on https://github.com/justin13601/ACES/issues/97\n", "Columns:\n", " - trigger\n", @@ -193,13 +220,13 @@ " - input.start_summary\n", " - gap.end_summary\n", " - target.end_summary\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.776\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.233536. Results saved to '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h/held_out/0.parquet'.\u001b[0m\n", - "[2024-12-14 17:02:13,777][HYDRA] \t#1 : data=sharded data.standard=meds data.root=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/data data.shard=train/0 cohort_dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels cohort_name=los_in_hospital_first_48h config_path=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml\n", - "\u001b[32m2024-12-14 17:02:13.842\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml'\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.845\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.845\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.845\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.846\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n", + "\u001b[32m2024-12-14 17:33:46.322\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.279571. Results saved to '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h/held_out/0.parquet'.\u001b[0m\n", + "[2024-12-14 17:33:46,323][HYDRA] \t#1 : data=sharded data.standard=meds data.root=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/data data.shard=train/0 cohort_dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels cohort_name=los_in_hospital_first_48h config_path=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml\n", + "\u001b[32m2024-12-14 17:33:46.391\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml'\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.394\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.394\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.394\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.395\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n", "standard: meds\n", "ts_format: '%m/%d/%Y %H:%M'\n", "root: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/data\n", @@ -207,37 +234,37 @@ "path: ${data.root}/${data.shard}.parquet\n", "_prefix: /${data.shard}\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.847\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.891\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.899\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_admission'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.907\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_discharge'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.910\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.910\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.927\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.927\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.927\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.928\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.930\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n", + "\u001b[32m2024-12-14 17:33:46.395\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.422\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.429\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_admission'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.436\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_discharge'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.438\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.438\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.450\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.451\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.451\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.451\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.452\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n", "trigger\n", "┣━━ input.end\n", "┃ ┣━━ input.start\n", "┃ ┗━━ gap.end\n", "┗━━ target.end\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.930\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.930\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.930\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.930\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 72,649 rows as they failed to satisfy '1 <= hospital_admission <= None'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.931\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.972\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.059\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.144\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 0 rows as they failed to satisfy 'None <= hospital_admission <= 0'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.144\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 26 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.148\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.238\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 212 valid rows returned corresponding to 75 subjects.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.238\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'discharge_or_death' from window 'target'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.238\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.240\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n", + "\u001b[32m2024-12-14 17:33:46.452\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.452\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.452\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.453\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 47,886 rows as they failed to satisfy '1 <= hospital_admission <= None'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.453\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.488\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.528\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.556\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 0 rows as they failed to satisfy 'None <= hospital_admission <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.556\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 18 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.561\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.595\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 145 valid rows returned corresponding to 46 subjects.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.595\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'discharge_or_death' from window 'target'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.595\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.597\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n", "If you need these columns, please comment on https://github.com/justin13601/ACES/issues/97\n", "Columns:\n", " - trigger\n", @@ -245,13 +272,13 @@ " - input.start_summary\n", " - gap.end_summary\n", " - target.end_summary\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.243\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.399837. Results saved to '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h/train/0.parquet'.\u001b[0m\n", - "[2024-12-14 17:02:14,243][HYDRA] \t#2 : data=sharded data.standard=meds data.root=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/data data.shard=tuning/0 cohort_dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels cohort_name=los_in_hospital_first_48h config_path=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml\n", - "\u001b[32m2024-12-14 17:02:14.309\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml'\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.312\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.312\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.312\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.313\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n", + "\u001b[32m2024-12-14 17:33:46.600\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.208261. Results saved to '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h/train/0.parquet'.\u001b[0m\n", + "[2024-12-14 17:33:46,600][HYDRA] \t#2 : data=sharded data.standard=meds data.root=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/data data.shard=tuning/0 cohort_dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels cohort_name=los_in_hospital_first_48h config_path=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml\n", + "\u001b[32m2024-12-14 17:33:46.670\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml'\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.673\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.673\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.673\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.674\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n", "standard: meds\n", "ts_format: '%m/%d/%Y %H:%M'\n", "root: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/data\n", @@ -259,37 +286,37 @@ "path: ${data.root}/${data.shard}.parquet\n", "_prefix: /${data.shard}\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.313\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.315\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.316\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_admission'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.317\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_discharge'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.317\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.318\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.320\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.320\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.320\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.320\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.321\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n", + "\u001b[32m2024-12-14 17:33:46.674\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.680\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.683\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_admission'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.686\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_discharge'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.687\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.687\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.692\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.693\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.693\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.693\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.694\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n", "trigger\n", "┣━━ input.end\n", "┃ ┣━━ input.start\n", "┃ ┗━━ gap.end\n", "┗━━ target.end\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.321\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.321\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.321\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.321\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 6,240 rows as they failed to satisfy '1 <= hospital_admission <= None'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.321\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.331\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.342\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.349\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 0 rows as they failed to satisfy 'None <= hospital_admission <= 0'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.349\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 2 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.352\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.360\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 15 valid rows returned corresponding to 9 subjects.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.360\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'discharge_or_death' from window 'target'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.360\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.361\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n", + "\u001b[32m2024-12-14 17:33:46.694\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.694\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.694\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.695\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 22,999 rows as they failed to satisfy '1 <= hospital_admission <= None'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.695\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.713\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.742\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.760\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 0 rows as they failed to satisfy 'None <= hospital_admission <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.760\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 6 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.764\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.792\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 59 valid rows returned corresponding to 25 subjects.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.792\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'discharge_or_death' from window 'target'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.792\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.793\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n", "If you need these columns, please comment on https://github.com/justin13601/ACES/issues/97\n", "Columns:\n", " - trigger\n", @@ -297,7 +324,7 @@ " - input.start_summary\n", " - gap.end_summary\n", " - target.end_summary\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.364\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.054431. Results saved to '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h/tuning/0.parquet'.\u001b[0m\n" + "\u001b[32m2024-12-14 17:33:46.796\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.125503. Results saved to '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h/tuning/0.parquet'.\u001b[0m\n" ] } ], @@ -307,7 +334,7 @@ }, { "cell_type": "code", - "execution_count": 62, + "execution_count": 5, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -321,9 +348,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "train prevalence: 0.231\n", - "tuning prevalence: 0.133\n", - "held_out prevalence: 0.25\n" + "train prevalence: 0.241\n", + "tuning prevalence: 0.169\n", + "held_out prevalence: 0.256\n" ] }, { @@ -336,7 +363,7 @@ " white-space: pre-wrap;\n", "}\n", "\n", - "shape: (243, 6)
subject_idprediction_timeboolean_valueinteger_valuefloat_valuecategorical_value
i64datetime[μs]booli64f64str
100128532175-04-07 15:36:00falsenullnullnull
100128532176-11-27 21:28:00falsenullnullnull
100147292125-03-01 07:15:00falsenullnullnull
100147292125-03-21 16:58:00falsenullnullnull
100167422178-07-05 21:13:00falsenullnullnull
100399972135-11-09 02:42:00truenullnullnull
100400252143-03-20 12:34:00truenullnullnull
100400252145-07-05 23:46:00truenullnullnull
100207402150-09-17 14:09:00truenullnullnull
100207402151-01-17 15:25:00truenullnullnull
" + "shape: (243, 6)
subject_idprediction_timeboolean_valueinteger_valuefloat_valuecategorical_value
i64datetime[μs]booli64f64str
100044572141-12-19 11:00:00falsenullnullnull
100090492174-05-28 08:21:00falsenullnullnull
100128532175-04-07 15:36:00falsenullnullnull
100128532176-11-27 21:28:00falsenullnullnull
100147292125-03-01 07:15:00falsenullnullnull
100264062129-01-05 15:55:00truenullnullnull
100356312113-07-19 17:15:00truenullnullnull
100397082140-09-27 04:17:00truenullnullnull
100397082142-05-17 17:14:00truenullnullnull
100397082142-07-08 09:08:00truenullnullnull
" ], "text/plain": [ "shape: (243, 6)\n", @@ -345,31 +372,31 @@ "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ i64 ┆ datetime[μs] ┆ bool ┆ i64 ┆ f64 ┆ str │\n", "╞════════════╪═══════════════════╪═══════════════╪═══════════════╪═════════════╪═══════════════════╡\n", + "│ 10004457 ┆ 2141-12-19 ┆ false ┆ null ┆ null ┆ null │\n", + "│ ┆ 11:00:00 ┆ ┆ ┆ ┆ │\n", + "│ 10009049 ┆ 2174-05-28 ┆ false ┆ null ┆ null ┆ null │\n", + "│ ┆ 08:21:00 ┆ ┆ ┆ ┆ │\n", "│ 10012853 ┆ 2175-04-07 ┆ false ┆ null ┆ null ┆ null │\n", "│ ┆ 15:36:00 ┆ ┆ ┆ ┆ │\n", "│ 10012853 ┆ 2176-11-27 ┆ false ┆ null ┆ null ┆ null │\n", "│ ┆ 21:28:00 ┆ ┆ ┆ ┆ │\n", "│ 10014729 ┆ 2125-03-01 ┆ false ┆ null ┆ null ┆ null │\n", "│ ┆ 07:15:00 ┆ ┆ ┆ ┆ │\n", - "│ 10014729 ┆ 2125-03-21 ┆ false ┆ null ┆ null ┆ null │\n", - "│ ┆ 16:58:00 ┆ ┆ ┆ ┆ │\n", - "│ 10016742 ┆ 2178-07-05 ┆ false ┆ null ┆ null ┆ null │\n", - "│ ┆ 21:13:00 ┆ ┆ ┆ ┆ │\n", "│ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", - "│ 10039997 ┆ 2135-11-09 ┆ true ┆ null ┆ null ┆ null │\n", - "│ ┆ 02:42:00 ┆ ┆ ┆ ┆ │\n", - "│ 10040025 ┆ 2143-03-20 ┆ true ┆ null ┆ null ┆ null │\n", - "│ ┆ 12:34:00 ┆ ┆ ┆ ┆ │\n", - "│ 10040025 ┆ 2145-07-05 ┆ true ┆ null ┆ null ┆ null │\n", - "│ ┆ 23:46:00 ┆ ┆ ┆ ┆ │\n", - "│ 10020740 ┆ 2150-09-17 ┆ true ┆ null ┆ null ┆ null │\n", - "│ ┆ 14:09:00 ┆ ┆ ┆ ┆ │\n", - "│ 10020740 ┆ 2151-01-17 ┆ true ┆ null ┆ null ┆ null │\n", - "│ ┆ 15:25:00 ┆ ┆ ┆ ┆ │\n", + "│ 10026406 ┆ 2129-01-05 ┆ true ┆ null ┆ null ┆ null │\n", + "│ ┆ 15:55:00 ┆ ┆ ┆ ┆ │\n", + "│ 10035631 ┆ 2113-07-19 ┆ true ┆ null ┆ null ┆ null │\n", + "│ ┆ 17:15:00 ┆ ┆ ┆ ┆ │\n", + "│ 10039708 ┆ 2140-09-27 ┆ true ┆ null ┆ null ┆ null │\n", + "│ ┆ 04:17:00 ┆ ┆ ┆ ┆ │\n", + "│ 10039708 ┆ 2142-05-17 ┆ true ┆ null ┆ null ┆ null │\n", + "│ ┆ 17:14:00 ┆ ┆ ┆ ┆ │\n", + "│ 10039708 ┆ 2142-07-08 ┆ true ┆ null ┆ null ┆ null │\n", + "│ ┆ 09:08:00 ┆ ┆ ┆ ┆ │\n", "└────────────┴───────────────────┴───────────────┴───────────────┴─────────────┴───────────────────┘" ] }, - "execution_count": 62, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -397,22 +424,42 @@ }, { "cell_type": "code", - "execution_count": 63, + "execution_count": 6, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/sim/miniconda3/envs/dev/lib/python3.12/pty.py:95: RuntimeWarning: Using fork() can cause Polars to deadlock in the child process.\n", + "In addition, using fork() with Python in general is a recipe for mysterious\n", + "deadlocks and crashes.\n", + "\n", + "The most likely reason you are seeing this error is because you are using the\n", + "multiprocessing module on Linux, which uses fork() by default. This will be\n", + "fixed in Python 3.14. Until then, you want to use the \"spawn\" context instead.\n", + "\n", + "See https://docs.pola.rs/user-guide/misc/multiprocessing/ for details.\n", + "\n", + "If you really know what your doing, you can silence this warning with the warning module\n", + "or by setting POLARS_ALLOW_FORKING_THREAD=1.\n", + "\n", + " pid, fd = os.forkpty()\n" + ] + }, { "name": "stdout", "output_type": "stream", "text": [ "Running task mortality/in_icu/first_24h on dataset MIMIC-IV with MEDS_ROOT_DIR=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds and SHARDS=held_out/0,train/0,tuning/0\n", - "[2024-12-14 17:02:21,042][HYDRA] Launching 3 jobs locally\n", - "[2024-12-14 17:02:21,042][HYDRA] \t#0 : data.shard=held_out/0\n", - "\u001b[32m2024-12-14 17:02:21.188\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/Users/sim/Documents/projects/MEDS-DEV/src/MEDS_DEV/tasks/criteria/mortality/in_icu/first_24h.yaml'\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.190\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m151\u001b[0m - \u001b[1mOverriding predicates and/or demographics from '/Users/sim/Documents/projects/MEDS-DEV/src/MEDS_DEV/datasets/MIMIC-IV/predicates.yaml'\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.207\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.207\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.207\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.209\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n", + "[2024-12-14 17:33:48,126][HYDRA] Launching 3 jobs locally\n", + "[2024-12-14 17:33:48,126][HYDRA] \t#0 : data.shard=held_out/0\n", + "\u001b[32m2024-12-14 17:33:48.285\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/Users/sim/Documents/projects/MEDS-DEV/src/MEDS_DEV/tasks/criteria/mortality/in_icu/first_24h.yaml'\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.286\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m151\u001b[0m - \u001b[1mOverriding predicates and/or demographics from '/Users/sim/Documents/projects/MEDS-DEV/src/MEDS_DEV/datasets/MIMIC-IV/predicates.yaml'\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.304\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.304\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.304\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.306\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n", "standard: meds\n", "ts_format: '%m/%d/%Y %H:%M'\n", "root: ${oc.env:MEDS_ROOT_DIR}/data\n", @@ -420,38 +467,37 @@ "path: ${data.root}/${data.shard}.parquet\n", "_prefix: /${data.shard}\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.209\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.223\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.229\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_admission'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.230\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_discharge'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.231\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.231\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.237\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.238\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.238\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.238\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.240\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n", + "\u001b[32m2024-12-14 17:33:48.306\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.323\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.332\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_admission'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.335\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_discharge'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.336\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.336\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.344\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.346\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.346\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.346\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.348\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n", "trigger\n", "┣━━ input.end\n", "┃ ┗━━ input.start\n", "┗━━ gap.end\n", " ┗━━ target.end\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.240\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.240\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.242\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.242\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 4,163 rows as they failed to satisfy '1 <= icu_admission <= None'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.243\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.255\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.277\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.285\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 0 rows as they failed to satisfy 'None <= icu_admission <= 0'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.285\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 6 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.286\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.302\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 6 valid rows returned corresponding to 4 subjects.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.302\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'death' from window 'target'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.303\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m142\u001b[0m - \u001b[33m\u001b[1mAll labels in the extracted cohort are the same: '0'. This may indicate an issue with the task logic. Please double-check your configuration file if this is not expected.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.303\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.306\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n", + "\u001b[32m2024-12-14 17:33:48.348\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.348\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.349\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.350\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 12,173 rows as they failed to satisfy '1 <= icu_admission <= None'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.351\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.364\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.396\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.405\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 0 rows as they failed to satisfy 'None <= icu_admission <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.405\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 17 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.406\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.429\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 16 valid rows returned corresponding to 11 subjects.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.429\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'death' from window 'target'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.429\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.433\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n", "If you need these columns, please comment on https://github.com/justin13601/ACES/issues/97\n", "Columns:\n", " - trigger\n", @@ -459,14 +505,14 @@ " - input.start_summary\n", " - gap.end_summary\n", " - target.end_summary\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.398\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.209584. Results saved to '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/mortality/in_icu/first_24h/held_out/0.parquet'.\u001b[0m\n", - "[2024-12-14 17:02:21,399][HYDRA] \t#1 : data.shard=train/0\n", - "\u001b[32m2024-12-14 17:02:21.469\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/Users/sim/Documents/projects/MEDS-DEV/src/MEDS_DEV/tasks/criteria/mortality/in_icu/first_24h.yaml'\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.470\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m151\u001b[0m - \u001b[1mOverriding predicates and/or demographics from '/Users/sim/Documents/projects/MEDS-DEV/src/MEDS_DEV/datasets/MIMIC-IV/predicates.yaml'\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.485\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.485\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.485\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.486\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n", + "\u001b[32m2024-12-14 17:33:48.516\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.231224. Results saved to '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/mortality/in_icu/first_24h/held_out/0.parquet'.\u001b[0m\n", + "[2024-12-14 17:33:48,517][HYDRA] \t#1 : data.shard=train/0\n", + "\u001b[32m2024-12-14 17:33:48.590\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/Users/sim/Documents/projects/MEDS-DEV/src/MEDS_DEV/tasks/criteria/mortality/in_icu/first_24h.yaml'\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.592\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m151\u001b[0m - \u001b[1mOverriding predicates and/or demographics from '/Users/sim/Documents/projects/MEDS-DEV/src/MEDS_DEV/datasets/MIMIC-IV/predicates.yaml'\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.607\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.607\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.607\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.608\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n", "standard: meds\n", "ts_format: '%m/%d/%Y %H:%M'\n", "root: ${oc.env:MEDS_ROOT_DIR}/data\n", @@ -474,37 +520,37 @@ "path: ${data.root}/${data.shard}.parquet\n", "_prefix: /${data.shard}\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.487\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.516\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.531\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_admission'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.546\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_discharge'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.549\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.549\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.564\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.565\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.565\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.565\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.567\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n", + "\u001b[32m2024-12-14 17:33:48.609\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.625\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.636\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_admission'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.646\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_discharge'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.647\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.647\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.659\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.659\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.659\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.659\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.661\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n", "trigger\n", "┣━━ input.end\n", "┃ ┗━━ input.start\n", "┗━━ gap.end\n", " ┗━━ target.end\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.568\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.568\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.568\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.568\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 72,774 rows as they failed to satisfy '1 <= icu_admission <= None'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.569\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.622\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.680\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.714\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 2 rows as they failed to satisfy 'None <= icu_admission <= 0'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.715\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 53 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.715\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.804\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 60 valid rows returned corresponding to 47 subjects.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.804\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'death' from window 'target'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.805\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.806\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n", + "\u001b[32m2024-12-14 17:33:48.661\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.661\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.661\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.661\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 47,976 rows as they failed to satisfy '1 <= icu_admission <= None'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.661\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.688\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.778\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.805\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 2 rows as they failed to satisfy 'None <= icu_admission <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.806\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 37 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.806\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.852\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 36 valid rows returned corresponding to 27 subjects.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.852\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'death' from window 'target'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.852\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.854\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n", "If you need these columns, please comment on https://github.com/justin13601/ACES/issues/97\n", "Columns:\n", " - trigger\n", @@ -512,14 +558,14 @@ " - input.start_summary\n", " - gap.end_summary\n", " - target.end_summary\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.810\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.340355. Results saved to '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/mortality/in_icu/first_24h/train/0.parquet'.\u001b[0m\n", - "[2024-12-14 17:02:21,810][HYDRA] \t#2 : data.shard=tuning/0\n", - "\u001b[32m2024-12-14 17:02:21.878\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/Users/sim/Documents/projects/MEDS-DEV/src/MEDS_DEV/tasks/criteria/mortality/in_icu/first_24h.yaml'\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.879\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m151\u001b[0m - \u001b[1mOverriding predicates and/or demographics from '/Users/sim/Documents/projects/MEDS-DEV/src/MEDS_DEV/datasets/MIMIC-IV/predicates.yaml'\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.894\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.895\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.895\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.895\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n", + "\u001b[32m2024-12-14 17:33:48.858\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.267562. Results saved to '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/mortality/in_icu/first_24h/train/0.parquet'.\u001b[0m\n", + "[2024-12-14 17:33:48,859][HYDRA] \t#2 : data.shard=tuning/0\n", + "\u001b[32m2024-12-14 17:33:48.931\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/Users/sim/Documents/projects/MEDS-DEV/src/MEDS_DEV/tasks/criteria/mortality/in_icu/first_24h.yaml'\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.932\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m151\u001b[0m - \u001b[1mOverriding predicates and/or demographics from '/Users/sim/Documents/projects/MEDS-DEV/src/MEDS_DEV/datasets/MIMIC-IV/predicates.yaml'\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.947\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.948\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.948\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.949\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n", "standard: meds\n", "ts_format: '%m/%d/%Y %H:%M'\n", "root: ${oc.env:MEDS_ROOT_DIR}/data\n", @@ -527,37 +573,37 @@ "path: ${data.root}/${data.shard}.parquet\n", "_prefix: /${data.shard}\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.896\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.897\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.899\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_admission'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.901\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_discharge'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.901\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.901\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.903\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.903\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n", + "\u001b[32m2024-12-14 17:33:48.949\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.953\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.958\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_admission'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.963\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_discharge'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.963\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.963\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.968\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.969\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.969\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.969\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.970\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n", "trigger\n", "┣━━ input.end\n", "┃ ┗━━ input.start\n", "┗━━ gap.end\n", " ┗━━ target.end\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.905\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.905\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 6,242 rows as they failed to satisfy '1 <= icu_admission <= None'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.905\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.911\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.924\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.931\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 0 rows as they failed to satisfy 'None <= icu_admission <= 0'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.931\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 7 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.932\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.945\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 8 valid rows returned corresponding to 5 subjects.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.945\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'death' from window 'target'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.945\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.946\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n", + "\u001b[32m2024-12-14 17:33:48.970\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.970\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.970\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.971\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 23,030 rows as they failed to satisfy '1 <= icu_admission <= None'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.971\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.989\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:49.018\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:49.048\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 0 rows as they failed to satisfy 'None <= icu_admission <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:49.049\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 12 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:49.049\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:49.083\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 22 valid rows returned corresponding to 18 subjects.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:49.083\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'death' from window 'target'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:49.083\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:49.085\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n", "If you need these columns, please comment on https://github.com/justin13601/ACES/issues/97\n", "Columns:\n", " - trigger\n", @@ -565,7 +611,7 @@ " - input.start_summary\n", " - gap.end_summary\n", " - target.end_summary\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.950\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.071263. Results saved to '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/mortality/in_icu/first_24h/tuning/0.parquet'.\u001b[0m\n" + "\u001b[32m2024-12-14 17:33:49.089\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.157569. Results saved to '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/mortality/in_icu/first_24h/tuning/0.parquet'.\u001b[0m\n" ] } ], @@ -576,7 +622,7 @@ }, { "cell_type": "code", - "execution_count": 66, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -593,16 +639,16 @@ }, { "cell_type": "code", - "execution_count": 67, + "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "train prevalence: 0.133\n", - "tuning prevalence: 0.125\n", - "held_out prevalence: 0.0\n" + "train prevalence: 0.167\n", + "tuning prevalence: 0.091\n", + "held_out prevalence: 0.062\n" ] }, { @@ -615,7 +661,7 @@ " white-space: pre-wrap;\n", "}\n", "\n", - "shape: (74, 6)
subject_idprediction_timeboolean_valueinteger_valuefloat_valuecategorical_value
i64datetime[μs]booli64f64str
100128532176-11-27 02:34:49falsenullnullnull
100147292125-02-28 10:03:08falsenullnullnull
100167422178-07-04 22:45:00falsenullnullnull
100167422178-07-14 08:16:00falsenullnullnull
100167422178-07-23 08:19:00falsenullnullnull
100104712155-12-03 20:33:00truenullnullnull
100159312177-03-25 21:48:07truenullnullnull
100378612117-03-15 16:34:58truenullnullnull
100379752185-01-18 19:12:12truenullnullnull
100380812115-10-10 10:15:25truenullnullnull
" + "shape: (74, 6)
subject_idprediction_timeboolean_valueinteger_valuefloat_valuecategorical_value
i64datetime[μs]booli64f64str
100128532176-11-27 02:34:49falsenullnullnull
100147292125-02-28 10:03:08falsenullnullnull
100167422178-07-04 22:45:00falsenullnullnull
100167422178-07-14 08:16:00falsenullnullnull
100167422178-07-23 08:19:00falsenullnullnull
100104712155-12-03 20:33:00truenullnullnull
100159312177-03-25 21:48:07truenullnullnull
100378612117-03-15 16:34:58truenullnullnull
100047202186-11-13 19:55:00truenullnullnull
100379752185-01-18 19:12:12truenullnullnull
" ], "text/plain": [ "shape: (74, 6)\n", @@ -641,14 +687,14 @@ "│ ┆ 21:48:07 ┆ ┆ ┆ ┆ │\n", "│ 10037861 ┆ 2117-03-15 ┆ true ┆ null ┆ null ┆ null │\n", "│ ┆ 16:34:58 ┆ ┆ ┆ ┆ │\n", + "│ 10004720 ┆ 2186-11-13 ┆ true ┆ null ┆ null ┆ null │\n", + "│ ┆ 19:55:00 ┆ ┆ ┆ ┆ │\n", "│ 10037975 ┆ 2185-01-18 ┆ true ┆ null ┆ null ┆ null │\n", "│ ┆ 19:12:12 ┆ ┆ ┆ ┆ │\n", - "│ 10038081 ┆ 2115-10-10 ┆ true ┆ null ┆ null ┆ null │\n", - "│ ┆ 10:15:25 ┆ ┆ ┆ ┆ │\n", "└────────────┴───────────────────┴───────────────┴───────────────┴─────────────┴───────────────────┘" ] }, - "execution_count": 67, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } diff --git a/demo/extract_meds_data.ipynb b/demo/extract_meds_data.ipynb index f6c55b6..ac77546 100644 --- a/demo/extract_meds_data.ipynb +++ b/demo/extract_meds_data.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 14, + "execution_count": 36, "metadata": { "id": "ikPVQZOnPcI0" }, @@ -14,7 +14,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 37, "metadata": { "collapsed": true, "id": "rjqK4CuRPfnE" @@ -43,7 +43,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 38, "metadata": {}, "outputs": [], "source": [ @@ -52,7 +52,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 39, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -69,8 +69,8 @@ "remote: Enumerating objects: 144, done.\u001b[K\n", "remote: Counting objects: 100% (144/144), done.\u001b[K\n", "remote: Compressing objects: 100% (129/129), done.\u001b[K\n", - "remote: Total 144 (delta 22), reused 69 (delta 7), pack-reused 0 (from 0)\u001b[K\n", - "Receiving objects: 100% (144/144), 211.41 KiB | 979.00 KiB/s, done.\n", + "remote: Total 144 (delta 22), reused 70 (delta 7), pack-reused 0 (from 0)\u001b[K\n", + "Receiving objects: 100% (144/144), 211.41 KiB | 571.00 KiB/s, done.\n", "Resolving deltas: 100% (22/22), done.\n", "usage: cp [-R [-H | -L | -P]] [-fi | -n] [-aclpSsvXx] source_file target_file\n", " cp [-R [-H | -L | -P]] [-fi | -n] [-aclpSsvXx] source_file ... target_directory\n" @@ -87,7 +87,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 40, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -100,225 +100,225 @@ "name": "stdout", "output_type": "stream", "text": [ - "--2024-12-14 16:06:48-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/d_labitems_to_loinc.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "--2024-12-14 17:15:14-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/d_labitems_to_loinc.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 361048 (353K) [text/plain]\n", "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/d_labitems_to_loinc.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 352.59K 664KB/s in 0.5s \n", + "/Users/sim/Document 100%[===================>] 352.59K 1.04MB/s in 0.3s \n", "\n", - "2024-12-14 16:06:49 (664 KB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/d_labitems_to_loinc.csv’ saved [361048/361048]\n", + "2024-12-14 17:15:14 (1.04 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/d_labitems_to_loinc.csv’ saved [361048/361048]\n", "\n", - "--2024-12-14 16:06:49-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/d_labitems_to_loinc.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "--2024-12-14 17:15:15-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/d_labitems_to_loinc.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 361048 (353K) [text/plain]\n", "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//d_labitems_to_loinc.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 352.59K 942KB/s in 0.4s \n", + "/Users/sim/Document 100%[===================>] 352.59K 561KB/s in 0.6s \n", "\n", - "2024-12-14 16:06:49 (942 KB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//d_labitems_to_loinc.csv’ saved [361048/361048]\n", + "2024-12-14 17:15:15 (561 KB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//d_labitems_to_loinc.csv’ saved [361048/361048]\n", "\n", - "--2024-12-14 16:06:50-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/inputevents_to_rxnorm.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "--2024-12-14 17:15:16-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/inputevents_to_rxnorm.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 79195 (77K) [text/plain]\n", "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/inputevents_to_rxnorm.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 77.34K --.-KB/s in 0.009s \n", + "/Users/sim/Document 100%[===================>] 77.34K 109KB/s in 0.7s \n", "\n", - "2024-12-14 16:06:50 (8.44 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/inputevents_to_rxnorm.csv’ saved [79195/79195]\n", + "2024-12-14 17:15:17 (109 KB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/inputevents_to_rxnorm.csv’ saved [79195/79195]\n", "\n", - "--2024-12-14 16:06:51-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/inputevents_to_rxnorm.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "--2024-12-14 17:15:17-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/inputevents_to_rxnorm.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 79195 (77K) [text/plain]\n", "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//inputevents_to_rxnorm.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 77.34K --.-KB/s in 0.008s \n", + "/Users/sim/Document 100%[===================>] 77.34K --.-KB/s in 0.009s \n", "\n", - "2024-12-14 16:06:51 (9.16 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//inputevents_to_rxnorm.csv’ saved [79195/79195]\n", + "2024-12-14 17:15:17 (8.59 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//inputevents_to_rxnorm.csv’ saved [79195/79195]\n", "\n", - "--2024-12-14 16:06:51-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/lab_itemid_to_loinc.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "--2024-12-14 17:15:18-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/lab_itemid_to_loinc.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 79970 (78K) [text/plain]\n", "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/lab_itemid_to_loinc.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 78.10K --.-KB/s in 0.01s \n", + "/Users/sim/Document 100%[===================>] 78.10K --.-KB/s in 0.08s \n", "\n", - "2024-12-14 16:06:51 (6.67 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/lab_itemid_to_loinc.csv’ saved [79970/79970]\n", + "2024-12-14 17:15:18 (986 KB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/lab_itemid_to_loinc.csv’ saved [79970/79970]\n", "\n", - "--2024-12-14 16:06:52-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/lab_itemid_to_loinc.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "--2024-12-14 17:15:18-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/lab_itemid_to_loinc.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 79970 (78K) [text/plain]\n", "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//lab_itemid_to_loinc.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 78.10K --.-KB/s in 0.007s \n", + "/Users/sim/Document 100%[===================>] 78.10K --.-KB/s in 0.01s \n", "\n", - "2024-12-14 16:06:52 (11.4 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//lab_itemid_to_loinc.csv’ saved [79970/79970]\n", + "2024-12-14 17:15:18 (5.33 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//lab_itemid_to_loinc.csv’ saved [79970/79970]\n", "\n", - "--2024-12-14 16:06:52-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/meas_chartevents_main.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "--2024-12-14 17:15:19-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/meas_chartevents_main.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 34862 (34K) [text/plain]\n", "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/meas_chartevents_main.csv’\n", "\n", "/Users/sim/Document 100%[===================>] 34.04K --.-KB/s in 0.003s \n", "\n", - "2024-12-14 16:06:52 (10.1 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/meas_chartevents_main.csv’ saved [34862/34862]\n", + "2024-12-14 17:15:19 (9.53 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/meas_chartevents_main.csv’ saved [34862/34862]\n", "\n", - "--2024-12-14 16:06:53-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/meas_chartevents_main.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "--2024-12-14 17:15:19-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/meas_chartevents_main.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 34862 (34K) [text/plain]\n", "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//meas_chartevents_main.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 34.04K --.-KB/s in 0.002s \n", + "/Users/sim/Document 100%[===================>] 34.04K --.-KB/s in 0.004s \n", "\n", - "2024-12-14 16:06:53 (17.4 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//meas_chartevents_main.csv’ saved [34862/34862]\n", + "2024-12-14 17:15:19 (7.46 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//meas_chartevents_main.csv’ saved [34862/34862]\n", "\n", - "--2024-12-14 16:06:53-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/meas_chartevents_value.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "--2024-12-14 17:15:20-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/meas_chartevents_value.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 5902 (5.8K) [text/plain]\n", "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/meas_chartevents_value.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 5.76K --.-KB/s in 0.001s \n", + "/Users/sim/Document 100%[===================>] 5.76K --.-KB/s in 0.002s \n", "\n", - "2024-12-14 16:06:54 (8.35 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/meas_chartevents_value.csv’ saved [5902/5902]\n", + "2024-12-14 17:15:20 (3.64 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/meas_chartevents_value.csv’ saved [5902/5902]\n", "\n", - "--2024-12-14 16:06:54-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/meas_chartevents_value.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "--2024-12-14 17:15:20-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/meas_chartevents_value.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 5902 (5.8K) [text/plain]\n", "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//meas_chartevents_value.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 5.76K --.-KB/s in 0s \n", + "/Users/sim/Document 100%[===================>] 5.76K --.-KB/s in 0.001s \n", "\n", - "2024-12-14 16:06:54 (49.4 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//meas_chartevents_value.csv’ saved [5902/5902]\n", + "2024-12-14 17:15:20 (3.79 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//meas_chartevents_value.csv’ saved [5902/5902]\n", "\n", - "--2024-12-14 16:06:54-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/numerics-summary.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "--2024-12-14 17:15:21-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/numerics-summary.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 32353 (32K) [text/plain]\n", "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/numerics-summary.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 31.59K --.-KB/s in 0.001s \n", + "/Users/sim/Document 100%[===================>] 31.59K --.-KB/s in 0.002s \n", "\n", - "2024-12-14 16:06:55 (27.5 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/numerics-summary.csv’ saved [32353/32353]\n", + "2024-12-14 17:15:21 (12.5 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/numerics-summary.csv’ saved [32353/32353]\n", "\n", - "--2024-12-14 16:06:55-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/numerics-summary.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "--2024-12-14 17:15:21-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/numerics-summary.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 32353 (32K) [text/plain]\n", "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//numerics-summary.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 31.59K --.-KB/s in 0.001s \n", + "/Users/sim/Document 100%[===================>] 31.59K --.-KB/s in 0.004s \n", "\n", - "2024-12-14 16:06:55 (61.1 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//numerics-summary.csv’ saved [32353/32353]\n", + "2024-12-14 17:15:21 (7.26 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//numerics-summary.csv’ saved [32353/32353]\n", "\n", - "--2024-12-14 16:06:56-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/outputevents_to_loinc.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "--2024-12-14 17:15:22-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/outputevents_to_loinc.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 34008 (33K) [text/plain]\n", "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/outputevents_to_loinc.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 33.21K --.-KB/s in 0.001s \n", + "/Users/sim/Document 100%[===================>] 33.21K --.-KB/s in 0.006s \n", "\n", - "2024-12-14 16:06:56 (22.2 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/outputevents_to_loinc.csv’ saved [34008/34008]\n", + "2024-12-14 17:15:22 (5.46 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/outputevents_to_loinc.csv’ saved [34008/34008]\n", "\n", - "--2024-12-14 16:06:56-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/outputevents_to_loinc.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "--2024-12-14 17:15:22-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/outputevents_to_loinc.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 34008 (33K) [text/plain]\n", "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//outputevents_to_loinc.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 33.21K --.-KB/s in 0s \n", + "/Users/sim/Document 100%[===================>] 33.21K --.-KB/s in 0.004s \n", "\n", - "2024-12-14 16:06:56 (133 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//outputevents_to_loinc.csv’ saved [34008/34008]\n", + "2024-12-14 17:15:23 (9.19 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//outputevents_to_loinc.csv’ saved [34008/34008]\n", "\n", - "--2024-12-14 16:06:57-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/proc_datetimeevents.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "--2024-12-14 17:15:23-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/proc_datetimeevents.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 25205 (25K) [text/plain]\n", "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/proc_datetimeevents.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 24.61K --.-KB/s in 0.001s \n", + "/Users/sim/Document 100%[===================>] 24.61K --.-KB/s in 0.002s \n", "\n", - "2024-12-14 16:06:57 (28.1 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/proc_datetimeevents.csv’ saved [25205/25205]\n", + "2024-12-14 17:15:23 (10.2 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/proc_datetimeevents.csv’ saved [25205/25205]\n", "\n", - "--2024-12-14 16:06:57-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/proc_datetimeevents.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "--2024-12-14 17:15:24-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/proc_datetimeevents.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 25205 (25K) [text/plain]\n", "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//proc_datetimeevents.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 24.61K --.-KB/s in 0.006s \n", + "/Users/sim/Document 100%[===================>] 24.61K --.-KB/s in 0.004s \n", "\n", - "2024-12-14 16:06:57 (3.71 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//proc_datetimeevents.csv’ saved [25205/25205]\n", + "2024-12-14 17:15:24 (6.67 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//proc_datetimeevents.csv’ saved [25205/25205]\n", "\n", - "--2024-12-14 16:06:58-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/proc_itemid.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "--2024-12-14 17:15:24-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/proc_itemid.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 21414 (21K) [text/plain]\n", "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/proc_itemid.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 20.91K --.-KB/s in 0.004s \n", + "/Users/sim/Document 100%[===================>] 20.91K --.-KB/s in 0.003s \n", "\n", - "2024-12-14 16:06:58 (4.98 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/proc_itemid.csv’ saved [21414/21414]\n", + "2024-12-14 17:15:24 (8.16 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/proc_itemid.csv’ saved [21414/21414]\n", "\n", - "--2024-12-14 16:06:59-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/proc_itemid.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "--2024-12-14 17:15:24-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/proc_itemid.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 21414 (21K) [text/plain]\n", "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//proc_itemid.csv’\n", "\n", "/Users/sim/Document 100%[===================>] 20.91K --.-KB/s in 0.002s \n", "\n", - "2024-12-14 16:06:59 (10.4 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//proc_itemid.csv’ saved [21414/21414]\n", + "2024-12-14 17:15:25 (8.29 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//proc_itemid.csv’ saved [21414/21414]\n", "\n", - "--2024-12-14 16:06:59-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/waveforms-summary.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "--2024-12-14 17:15:25-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/waveforms-summary.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 5743 (5.6K) [text/plain]\n", "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/waveforms-summary.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 5.61K --.-KB/s in 0s \n", + "/Users/sim/Document 100%[===================>] 5.61K --.-KB/s in 0.001s \n", "\n", - "2024-12-14 16:06:59 (30.4 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/waveforms-summary.csv’ saved [5743/5743]\n", + "2024-12-14 17:15:25 (4.44 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/waveforms-summary.csv’ saved [5743/5743]\n", "\n", - "--2024-12-14 16:07:00-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/waveforms-summary.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "--2024-12-14 17:15:25-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/waveforms-summary.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 5743 (5.6K) [text/plain]\n", "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//waveforms-summary.csv’\n", "\n", "/Users/sim/Document 100%[===================>] 5.61K --.-KB/s in 0s \n", "\n", - "2024-12-14 16:07:00 (37.3 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//waveforms-summary.csv’ saved [5743/5743]\n", + "2024-12-14 17:15:25 (53.7 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//waveforms-summary.csv’ saved [5743/5743]\n", "\n" ] } @@ -351,7 +351,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 41, "metadata": {}, "outputs": [], "source": [ @@ -360,7 +360,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 42, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -378,95 +378,95 @@ "Setting DO_UNZIP=true\n", "Unzipping csv.gz files matching /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/*/*.csv.gz.\n", "Running pre-MEDS conversion.\n", - "\u001b[32m2024-12-14 16:07:02.391\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/poe_detail.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.391\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/poe_detail: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/poe_detail.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/poe_detail.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.392\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/provider.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.392\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/provider: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/provider.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/provider.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.393\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.393\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/pharmacy: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/pharmacy.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.393\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m243\u001b[0m - \u001b[1mSkipping hosp/index @ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/index.html as no compatible dataframe file was found.\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.393\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.394\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/emar: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.394\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/microbiologyevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.394\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/microbiologyevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/microbiologyevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/microbiologyevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.395\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.395\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/labevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/labevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.395\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.396\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/admissions: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/admissions.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.396\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_labitems.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.396\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/d_labitems: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_labitems.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_labitems.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.396\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/prescriptions.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.397\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/prescriptions: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/prescriptions.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/prescriptions.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.397\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.397\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/procedures_icd: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/procedures_icd.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.398\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/poe.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.398\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/poe: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/poe.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/poe.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.398\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_hcpcs.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.398\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/d_hcpcs: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_hcpcs.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_hcpcs.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.399\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/omr.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.399\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/omr: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/omr.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/omr.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.399\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.400\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/transfers: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/transfers.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.400\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/diagnoses_icd.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.400\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/services.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.400\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/services: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/services.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/services.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.401\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/hcpcsevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.401\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/hcpcsevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/hcpcsevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/hcpcsevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.401\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/drgcodes.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.402\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/patients.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.402\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_icd_diagnoses.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.402\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_icd_procedures.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.403\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar_detail.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.403\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/emar_detail: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar_detail.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar_detail.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.403\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m243\u001b[0m - \u001b[1mSkipping icu/index @ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/index.html as no compatible dataframe file was found.\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.404\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/d_items.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.404\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/d_items: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/d_items.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/d_items.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.404\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.405\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/procedureevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/procedureevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.405\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.405\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/inputevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/inputevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.406\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/datetimeevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.406\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/datetimeevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/datetimeevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/datetimeevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.406\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/ingredientevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.407\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/ingredientevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/ingredientevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/ingredientevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.407\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.407\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/chartevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/chartevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.408\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/caregiver.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.408\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/caregiver: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/caregiver.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/caregiver.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.408\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.408\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/outputevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/outputevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.409\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/icustays.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.409\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/icustays: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/icustays.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/icustays.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.409\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.409\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m300\u001b[0m - \u001b[1mLoading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv for manipulating other dataframes...\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.411\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m305\u001b[0m - \u001b[1m Loaded in 0:00:00.001291\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.411\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m311\u001b[0m - \u001b[1m Processing dependent df @ hosp/diagnoses_icd...\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.411\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1m Loading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/diagnoses_icd.csv...\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.411\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m317\u001b[0m - \u001b[1m Loaded in 0:00:00.000166\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.480\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m320\u001b[0m - \u001b[1m Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet in 0:00:00.068729\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.480\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m311\u001b[0m - \u001b[1m Processing dependent df @ hosp/drgcodes...\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.480\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1m Loading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/drgcodes.csv...\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.480\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m317\u001b[0m - \u001b[1m Loaded in 0:00:00.000264\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.485\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m320\u001b[0m - \u001b[1m Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet in 0:00:00.004873\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.485\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m311\u001b[0m - \u001b[1m Processing dependent df @ hosp/patients...\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.485\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1m Loading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/patients.csv...\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.485\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m317\u001b[0m - \u001b[1m Loaded in 0:00:00.000171\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.492\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m320\u001b[0m - \u001b[1m Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet in 0:00:00.007117\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.492\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_icd_diagnoses.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.492\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m334\u001b[0m - \u001b[1mProcessing hosp/d_icd_diagnoses...\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.580\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1m Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_diagnoses.parquet in 0:00:00.087729\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.580\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_icd_procedures.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.580\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m334\u001b[0m - \u001b[1mProcessing hosp/d_icd_procedures...\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.633\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1m Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_procedures.parquet in 0:00:00.052624\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.633\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mDone! All dataframes processed and written to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.980\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/poe_detail.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.980\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/poe_detail: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/poe_detail.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/poe_detail.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.981\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/provider.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.981\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/provider: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/provider.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/provider.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.982\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.982\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/pharmacy: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/pharmacy.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.982\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m243\u001b[0m - \u001b[1mSkipping hosp/index @ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/index.html as no compatible dataframe file was found.\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.982\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.983\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/emar: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.983\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/microbiologyevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.983\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/microbiologyevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/microbiologyevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/microbiologyevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.984\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.984\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/labevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/labevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.984\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.984\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/admissions: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/admissions.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.985\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_labitems.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.985\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/d_labitems: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_labitems.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_labitems.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.985\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/prescriptions.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.986\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/prescriptions: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/prescriptions.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/prescriptions.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.986\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.987\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/procedures_icd: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/procedures_icd.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.987\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/poe.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.987\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/poe: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/poe.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/poe.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.987\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_hcpcs.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.988\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/d_hcpcs: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_hcpcs.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_hcpcs.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.988\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/omr.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.988\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/omr: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/omr.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/omr.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.989\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.989\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/transfers: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/transfers.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.989\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/diagnoses_icd.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.989\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/services.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.990\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/services: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/services.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/services.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.990\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/hcpcsevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.990\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/hcpcsevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/hcpcsevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/hcpcsevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.991\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/drgcodes.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.991\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/patients.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.991\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_icd_diagnoses.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.992\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_icd_procedures.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.992\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar_detail.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.993\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/emar_detail: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar_detail.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar_detail.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.993\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m243\u001b[0m - \u001b[1mSkipping icu/index @ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/index.html as no compatible dataframe file was found.\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.993\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/d_items.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.994\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/d_items: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/d_items.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/d_items.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.994\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.994\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/procedureevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/procedureevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.995\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.995\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/inputevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/inputevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.995\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/datetimeevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.995\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/datetimeevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/datetimeevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/datetimeevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.996\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/ingredientevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.996\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/ingredientevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/ingredientevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/ingredientevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.996\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.997\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/chartevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/chartevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.997\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/caregiver.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.997\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/caregiver: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/caregiver.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/caregiver.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.998\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.998\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/outputevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/outputevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.998\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/icustays.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.998\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/icustays: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/icustays.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/icustays.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.999\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.999\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m300\u001b[0m - \u001b[1mLoading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv for manipulating other dataframes...\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:28.000\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m305\u001b[0m - \u001b[1m Loaded in 0:00:00.001182\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:28.000\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m311\u001b[0m - \u001b[1m Processing dependent df @ hosp/patients...\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:28.000\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1m Loading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/patients.csv...\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:28.000\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m317\u001b[0m - \u001b[1m Loaded in 0:00:00.000155\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:28.062\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m320\u001b[0m - \u001b[1m Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet in 0:00:00.061956\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:28.062\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m311\u001b[0m - \u001b[1m Processing dependent df @ hosp/drgcodes...\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:28.062\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1m Loading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/drgcodes.csv...\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:28.063\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m317\u001b[0m - \u001b[1m Loaded in 0:00:00.000196\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:28.067\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m320\u001b[0m - \u001b[1m Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet in 0:00:00.004755\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:28.067\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m311\u001b[0m - \u001b[1m Processing dependent df @ hosp/diagnoses_icd...\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:28.067\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1m Loading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/diagnoses_icd.csv...\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:28.067\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m317\u001b[0m - \u001b[1m Loaded in 0:00:00.000183\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:28.073\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m320\u001b[0m - \u001b[1m Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet in 0:00:00.005889\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:28.073\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_icd_diagnoses.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:28.073\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m334\u001b[0m - \u001b[1mProcessing hosp/d_icd_diagnoses...\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:28.161\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1m Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_diagnoses.parquet in 0:00:00.087207\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:28.161\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_icd_procedures.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:28.161\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m334\u001b[0m - \u001b[1mProcessing hosp/d_icd_procedures...\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:28.214\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1m Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_procedures.parquet in 0:00:00.053073\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:28.214\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mDone! All dataframes processed and written to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds\u001b[0m\n", "Setting N_WORKERS to 1 to avoid issues with the runners.\n", "Running extraction pipeline.\n", - "\u001b[32m2024-12-14 16:07:03.167\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: shard_events\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:03.187\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-shard_events --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=shard_events\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:06.436\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[32m2024-12-14 17:15:28.756\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: shard_events\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:28.775\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-shard_events --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=shard_events\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:31.395\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:06.436\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", - "2024-12-14 16:07:03.746 | INFO | MEDS_transforms.extract.shard_events:main:330 - Running with config:\n", + "\u001b[32m2024-12-14 17:15:31.395\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 17:15:29.323 | INFO | MEDS_transforms.extract.shard_events:main:330 - Running with config:\n", "input_dir: ${oc.env:MIMICIV_PRE_MEDS_DIR}\n", "cohort_dir: ${oc.env:MIMICIV_MEDS_COHORT_DIR}\n", "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", @@ -495,9 +495,9 @@ " n_subjects_per_shard: 1000\n", " external_splits_json_fp: null\n", " split_fracs:\n", - " train: 0.8\n", - " tuning: 0.1\n", - " held_out: 0.1\n", + " train: 0.5\n", + " tuning: 0.25\n", + " held_out: 0.25\n", " convert_to_sharded_events:\n", " do_dedup_text_and_numeric: true\n", " merge_to_MEDS_cohort:\n", @@ -513,10 +513,6 @@ " do_retype: true\n", " finalize_MEDS_data:\n", " do_retype: true\n", - " split_fracs:\n", - " train: 0.5\n", - " tuning: 0.25\n", - " held_out: 0.25\n", "worker: 0\n", "polling_time: 300\n", "stage: shard_events\n", @@ -550,368 +546,368 @@ "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events\n", "reducer_output_dir: null\n", "\n", - "2024-12-14 16:07:03.751 | INFO | MEDS_transforms.extract.shard_events:main:342 - Reading event conversion config from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs/event_configs.yaml to identify needed columns.\n", - "2024-12-14 16:07:03.781 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_diagnoses.parquet as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.782 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_procedures.parquet as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.782 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/microbiologyevents.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.783 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/caregiver.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.783 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/inputevents_to_rxnorm.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.783 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/services.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.783 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar_detail.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.783 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_datetimeevents.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.783 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/d_labitems_to_loinc.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.784 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/poe.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.784 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_hcpcs.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.784 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/numerics-summary.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.784 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_itemid.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.784 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/prescriptions.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.784 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/meas_chartevents_main.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.785 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/poe_detail.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.785 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/ingredientevents.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.785 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/datetimeevents.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.785 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/outputevents_to_loinc.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.785 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_labitems.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.785 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/meas_chartevents_value.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.786 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/d_items.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.786 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/provider.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.786 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/lab_itemid_to_loinc.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.786 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/waveforms-summary.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.787 | INFO | MEDS_transforms.extract.shard_events:main:368 - Starting event sub-sharding. Sub-sharding 16 files:\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv\n", + "2024-12-14 17:15:29.328 | INFO | MEDS_transforms.extract.shard_events:main:342 - Reading event conversion config from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs/event_configs.yaml to identify needed columns.\n", + "2024-12-14 17:15:29.358 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_diagnoses.parquet as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.358 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_procedures.parquet as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.359 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/ingredientevents.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.360 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/prescriptions.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.360 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_datetimeevents.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.360 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_labitems.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.361 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/numerics-summary.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.361 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/lab_itemid_to_loinc.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.361 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/microbiologyevents.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.361 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_itemid.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.361 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/provider.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.361 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/datetimeevents.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.361 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/waveforms-summary.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.361 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/meas_chartevents_value.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.362 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/services.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.362 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/outputevents_to_loinc.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.362 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/d_items.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.362 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/poe.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.362 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/poe_detail.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.362 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/meas_chartevents_main.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.362 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/d_labitems_to_loinc.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.363 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/caregiver.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.363 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar_detail.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.363 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/inputevents_to_rxnorm.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.363 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_hcpcs.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.364 | INFO | MEDS_transforms.extract.shard_events:main:368 - Starting event sub-sharding. Sub-sharding 16 files:\n", " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv\n", " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv\n", " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/icustays.csv\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv\n", " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/hcpcsevents.csv\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv\n", " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/omr.csv\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv\n", - "2024-12-14 16:07:03.790 | INFO | MEDS_transforms.extract.shard_events:main:372 - Will read raw data from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/$IN_FILE.parquet and write sub-sharded data to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/$IN_FILE/$ROW_START-$ROW_END.parquet\n", - "2024-12-14 16:07:03.792 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/chartevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents.\n", - "2024-12-14 16:07:03.792 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv to determine row count.\n", - "2024-12-14 16:07:03.794 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv as CSV with kwargs:\n", + "2024-12-14 17:15:29.366 | INFO | MEDS_transforms.extract.shard_events:main:372 - Will read raw data from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/$IN_FILE.parquet and write sub-sharded data to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/$IN_FILE/$ROW_START-$ROW_END.parquet\n", + "2024-12-14 17:15:29.369 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients.\n", + "2024-12-14 17:15:29.369 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet to determine row count.\n", + "2024-12-14 17:15:29.371 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:15:29.371 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:15:29.372 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['dod', 'gender', 'subject_id', 'year_of_birth']\n", + "2024-12-14 17:15:29.378 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, dod, gender, subject_id, year_of_birth\n", + "2024-12-14 17:15:29.384 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 100 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet.\n", + "2024-12-14 17:15:29.384 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:15:29.384 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet row-chunk [0-100) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet.\n", + "2024-12-14 17:15:29.387 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:29.387587. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:29.389 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet\n", + "2024-12-14 17:15:29.389 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:15:29.389 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:15:29.389 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['dod', 'gender', 'subject_id', 'year_of_birth']\n", + "2024-12-14 17:15:29.389 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, dod, gender, subject_id, year_of_birth\n", + "2024-12-14 17:15:29.389 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:29.390 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet\n", + "2024-12-14 17:15:29.398 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.011062\n", + "2024-12-14 17:15:29.398 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/.[0-100).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/.[0-100).parquet_cache/locks/2024-12-14T17:15:29.387587.json\n", + "2024-12-14 17:15:29.401 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes.\n", + "2024-12-14 17:15:29.401 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet to determine row count.\n", + "2024-12-14 17:15:29.403 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:15:29.403 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:15:29.404 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['description', 'drg_code', 'drg_mortality', 'drg_severity', 'drg_type', 'hadm_discharge_time', 'hadm_id', 'subject_id']\n", + "2024-12-14 17:15:29.404 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, description, drg_code, drg_mortality, drg_severity, drg_type, hadm_discharge_time, hadm_id, subject_id\n", + "2024-12-14 17:15:29.404 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 454 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet.\n", + "2024-12-14 17:15:29.404 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:15:29.404 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet row-chunk [0-454) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet.\n", + "2024-12-14 17:15:29.407 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:29.407233. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:29.407 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet\n", + "2024-12-14 17:15:29.407 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:15:29.407 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:15:29.407 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['description', 'drg_code', 'drg_mortality', 'drg_severity', 'drg_type', 'hadm_discharge_time', 'hadm_id', 'subject_id']\n", + "2024-12-14 17:15:29.407 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, description, drg_code, drg_mortality, drg_severity, drg_type, hadm_discharge_time, hadm_id, subject_id\n", + "2024-12-14 17:15:29.407 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:29.408 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 17:15:29.410 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.003108\n", + "2024-12-14 17:15:29.410 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/.[0-454).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/.[0-454).parquet_cache/locks/2024-12-14T17:15:29.407233.json\n", + "2024-12-14 17:15:29.412 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/procedures_icd.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd.\n", + "2024-12-14 17:15:29.413 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv to determine row count.\n", + "2024-12-14 17:15:29.415 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:03.795 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'hadm_id', 'itemid', 'stay_id', 'subject_id', 'value', 'valuenum', 'valueuom']\n", - "2024-12-14 16:07:04.301 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, hadm_id, itemid, stay_id, subject_id, value, valuenum, valueuom\n", - "2024-12-14 16:07:04.337 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 668862 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv.\n", - "2024-12-14 16:07:04.337 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/chartevents.csv into 1 row-chunks of size 200000000.\n", - "2024-12-14 16:07:04.337 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/chartevents.csv row-chunk [0-668862) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet.\n", - "2024-12-14 16:07:04.343 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:04.343324. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:04.345 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/chartevents.csv\n", - "2024-12-14 16:07:04.345 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv as CSV with kwargs:\n", + "2024-12-14 17:15:29.415 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['chartdate', 'hadm_id', 'icd_code', 'icd_version', 'subject_id']\n", + "2024-12-14 17:15:29.419 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, chartdate, hadm_id, icd_code, icd_version, subject_id\n", + "2024-12-14 17:15:29.420 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 722 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv.\n", + "2024-12-14 17:15:29.420 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/procedures_icd.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:15:29.420 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/procedures_icd.csv row-chunk [0-722) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet.\n", + "2024-12-14 17:15:29.423 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:29.423035. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:29.423 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/procedures_icd.csv\n", + "2024-12-14 17:15:29.423 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:04.345 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'hadm_id', 'itemid', 'stay_id', 'subject_id', 'value', 'valuenum', 'valueuom']\n", - "2024-12-14 16:07:05.156 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, hadm_id, itemid, stay_id, subject_id, value, valuenum, valueuom\n", - "2024-12-14 16:07:05.156 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:05.156 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet\n", - "2024-12-14 16:07:05.364 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:01.021167\n", - "2024-12-14 16:07:05.364 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/.[0-668862).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/.[0-668862).parquet_cache/locks/2024-12-14T16:07:04.343324.json\n", - "2024-12-14 16:07:05.367 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar.\n", - "2024-12-14 16:07:05.367 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv to determine row count.\n", - "2024-12-14 16:07:05.369 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv as CSV with kwargs:\n", + "2024-12-14 17:15:29.423 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['chartdate', 'hadm_id', 'icd_code', 'icd_version', 'subject_id']\n", + "2024-12-14 17:15:29.423 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, chartdate, hadm_id, icd_code, icd_version, subject_id\n", + "2024-12-14 17:15:29.423 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:29.424 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 17:15:29.427 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.004191\n", + "2024-12-14 17:15:29.427 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/.[0-722).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/.[0-722).parquet_cache/locks/2024-12-14T17:15:29.423035.json\n", + "2024-12-14 17:15:29.429 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/outputevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents.\n", + "2024-12-14 17:15:29.429 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv to determine row count.\n", + "2024-12-14 17:15:29.431 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:05.370 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'emar_id', 'emar_seq', 'event_txt', 'hadm_id', 'medication', 'subject_id']\n", - "2024-12-14 16:07:05.418 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, emar_id, emar_seq, event_txt, hadm_id, medication, subject_id\n", - "2024-12-14 16:07:05.422 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 35835 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv.\n", - "2024-12-14 16:07:05.422 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar.csv into 1 row-chunks of size 200000000.\n", - "2024-12-14 16:07:05.422 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar.csv row-chunk [0-35835) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet.\n", - "2024-12-14 16:07:05.425 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:05.425385. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:05.425 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar.csv\n", - "2024-12-14 16:07:05.425 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv as CSV with kwargs:\n", + "2024-12-14 17:15:29.432 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'hadm_id', 'itemid', 'stay_id', 'subject_id', 'value', 'valueuom']\n", + "2024-12-14 17:15:29.438 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, hadm_id, itemid, stay_id, subject_id, value, valueuom\n", + "2024-12-14 17:15:29.439 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 9362 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv.\n", + "2024-12-14 17:15:29.439 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/outputevents.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:15:29.439 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/outputevents.csv row-chunk [0-9362) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet.\n", + "2024-12-14 17:15:29.442 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:29.442133. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:29.442 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/outputevents.csv\n", + "2024-12-14 17:15:29.442 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:05.425 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'emar_id', 'emar_seq', 'event_txt', 'hadm_id', 'medication', 'subject_id']\n", - "2024-12-14 16:07:05.475 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, emar_id, emar_seq, event_txt, hadm_id, medication, subject_id\n", - "2024-12-14 16:07:05.475 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:05.475 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet\n", - "2024-12-14 16:07:05.514 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.089048\n", - "2024-12-14 16:07:05.514 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/.[0-35835).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/.[0-35835).parquet_cache/locks/2024-12-14T16:07:05.425385.json\n", - "2024-12-14 16:07:05.517 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/admissions.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions.\n", - "2024-12-14 16:07:05.517 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv to determine row count.\n", - "2024-12-14 16:07:05.519 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv as CSV with kwargs:\n", + "2024-12-14 17:15:29.442 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'hadm_id', 'itemid', 'stay_id', 'subject_id', 'value', 'valueuom']\n", + "2024-12-14 17:15:29.448 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, hadm_id, itemid, stay_id, subject_id, value, valueuom\n", + "2024-12-14 17:15:29.449 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:29.449 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 17:15:29.470 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.028809\n", + "2024-12-14 17:15:29.471 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/.[0-9362).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/.[0-9362).parquet_cache/locks/2024-12-14T17:15:29.442133.json\n", + "2024-12-14 17:15:29.473 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/icustays.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays.\n", + "2024-12-14 17:15:29.474 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/icustays.csv to determine row count.\n", + "2024-12-14 17:15:29.486 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/icustays.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:05.519 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['admission_location', 'admission_type', 'admittime', 'discharge_location', 'dischtime', 'edouttime', 'edregtime', 'hadm_id', 'insurance', 'language', 'marital_status', 'race', 'subject_id']\n", - "2024-12-14 16:07:05.520 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, admission_location, admission_type, admittime, discharge_location, dischtime, edouttime, edregtime, hadm_id, insurance, language, marital_status, race, subject_id\n", - "2024-12-14 16:07:05.521 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 275 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv.\n", - "2024-12-14 16:07:05.521 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/admissions.csv into 1 row-chunks of size 200000000.\n", - "2024-12-14 16:07:05.521 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/admissions.csv row-chunk [0-275) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet.\n", - "2024-12-14 16:07:05.523 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:05.523672. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:05.524 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/admissions.csv\n", - "2024-12-14 16:07:05.524 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv as CSV with kwargs:\n", + "2024-12-14 17:15:29.487 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['first_careunit', 'hadm_id', 'intime', 'last_careunit', 'outtime', 'stay_id', 'subject_id']\n", + "2024-12-14 17:15:29.489 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, first_careunit, hadm_id, intime, last_careunit, outtime, stay_id, subject_id\n", + "2024-12-14 17:15:29.490 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 140 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/icustays.csv.\n", + "2024-12-14 17:15:29.490 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/icustays.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:15:29.491 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/icustays.csv row-chunk [0-140) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet.\n", + "2024-12-14 17:15:29.498 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:29.498163. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:29.498 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/icustays.csv\n", + "2024-12-14 17:15:29.498 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/icustays.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:05.524 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['admission_location', 'admission_type', 'admittime', 'discharge_location', 'dischtime', 'edouttime', 'edregtime', 'hadm_id', 'insurance', 'language', 'marital_status', 'race', 'subject_id']\n", - "2024-12-14 16:07:05.524 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, admission_location, admission_type, admittime, discharge_location, dischtime, edouttime, edregtime, hadm_id, insurance, language, marital_status, race, subject_id\n", - "2024-12-14 16:07:05.524 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:05.525 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet\n", - "2024-12-14 16:07:05.528 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.005136\n", - "2024-12-14 16:07:05.528 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/.[0-275).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/.[0-275).parquet_cache/locks/2024-12-14T16:07:05.523672.json\n", - "2024-12-14 16:07:05.531 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/outputevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents.\n", - "2024-12-14 16:07:05.531 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv to determine row count.\n", - "2024-12-14 16:07:05.534 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv as CSV with kwargs:\n", + "2024-12-14 17:15:29.499 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['first_careunit', 'hadm_id', 'intime', 'last_careunit', 'outtime', 'stay_id', 'subject_id']\n", + "2024-12-14 17:15:29.499 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, first_careunit, hadm_id, intime, last_careunit, outtime, stay_id, subject_id\n", + "2024-12-14 17:15:29.499 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:29.500 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet\n", + "2024-12-14 17:15:29.504 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.005838\n", + "2024-12-14 17:15:29.504 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/.[0-140).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/.[0-140).parquet_cache/locks/2024-12-14T17:15:29.498163.json\n", + "2024-12-14 17:15:29.506 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/procedureevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents.\n", + "2024-12-14 17:15:29.506 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv to determine row count.\n", + "2024-12-14 17:15:29.509 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:05.534 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'hadm_id', 'itemid', 'stay_id', 'subject_id', 'value', 'valueuom']\n", - "2024-12-14 16:07:05.544 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, hadm_id, itemid, stay_id, subject_id, value, valueuom\n", - "2024-12-14 16:07:05.546 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 9362 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv.\n", - "2024-12-14 16:07:05.546 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/outputevents.csv into 1 row-chunks of size 200000000.\n", - "2024-12-14 16:07:05.546 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/outputevents.csv row-chunk [0-9362) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet.\n", - "2024-12-14 16:07:05.548 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:05.548505. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:05.548 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/outputevents.csv\n", - "2024-12-14 16:07:05.548 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv as CSV with kwargs:\n", + "2024-12-14 17:15:29.509 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['endtime', 'hadm_id', 'itemid', 'starttime', 'stay_id', 'subject_id']\n", + "2024-12-14 17:15:29.511 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, endtime, hadm_id, itemid, starttime, stay_id, subject_id\n", + "2024-12-14 17:15:29.512 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 1468 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv.\n", + "2024-12-14 17:15:29.512 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/procedureevents.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:15:29.512 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/procedureevents.csv row-chunk [0-1468) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet.\n", + "2024-12-14 17:15:29.515 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:29.515574. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:29.515 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/procedureevents.csv\n", + "2024-12-14 17:15:29.516 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:05.549 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'hadm_id', 'itemid', 'stay_id', 'subject_id', 'value', 'valueuom']\n", - "2024-12-14 16:07:05.559 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, hadm_id, itemid, stay_id, subject_id, value, valueuom\n", - "2024-12-14 16:07:05.559 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:05.560 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet\n", - "2024-12-14 16:07:05.583 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.034846\n", - "2024-12-14 16:07:05.583 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/.[0-9362).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/.[0-9362).parquet_cache/locks/2024-12-14T16:07:05.548505.json\n", - "2024-12-14 16:07:05.586 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/procedureevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents.\n", - "2024-12-14 16:07:05.586 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv to determine row count.\n", - "2024-12-14 16:07:05.588 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv as CSV with kwargs:\n", + "2024-12-14 17:15:29.516 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['endtime', 'hadm_id', 'itemid', 'starttime', 'stay_id', 'subject_id']\n", + "2024-12-14 17:15:29.518 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, endtime, hadm_id, itemid, starttime, stay_id, subject_id\n", + "2024-12-14 17:15:29.518 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:29.518 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 17:15:29.527 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.011919\n", + "2024-12-14 17:15:29.527 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/.[0-1468).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/.[0-1468).parquet_cache/locks/2024-12-14T17:15:29.515574.json\n", + "2024-12-14 17:15:29.530 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar.\n", + "2024-12-14 17:15:29.530 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv to determine row count.\n", + "2024-12-14 17:15:29.532 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:05.588 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['endtime', 'hadm_id', 'itemid', 'starttime', 'stay_id', 'subject_id']\n", - "2024-12-14 16:07:05.592 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, endtime, hadm_id, itemid, starttime, stay_id, subject_id\n", - "2024-12-14 16:07:05.593 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 1468 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv.\n", - "2024-12-14 16:07:05.593 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/procedureevents.csv into 1 row-chunks of size 200000000.\n", - "2024-12-14 16:07:05.593 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/procedureevents.csv row-chunk [0-1468) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet.\n", - "2024-12-14 16:07:05.595 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:05.595809. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:05.596 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/procedureevents.csv\n", - "2024-12-14 16:07:05.596 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv as CSV with kwargs:\n", + "2024-12-14 17:15:29.532 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'emar_id', 'emar_seq', 'event_txt', 'hadm_id', 'medication', 'subject_id']\n", + "2024-12-14 17:15:29.562 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, emar_id, emar_seq, event_txt, hadm_id, medication, subject_id\n", + "2024-12-14 17:15:29.565 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 35835 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv.\n", + "2024-12-14 17:15:29.566 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:15:29.566 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar.csv row-chunk [0-35835) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet.\n", + "2024-12-14 17:15:29.568 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:29.568485. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:29.568 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar.csv\n", + "2024-12-14 17:15:29.568 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:05.596 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['endtime', 'hadm_id', 'itemid', 'starttime', 'stay_id', 'subject_id']\n", - "2024-12-14 16:07:05.600 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, endtime, hadm_id, itemid, starttime, stay_id, subject_id\n", - "2024-12-14 16:07:05.600 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:05.600 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet\n", - "2024-12-14 16:07:05.607 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.012174\n", - "2024-12-14 16:07:05.608 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/.[0-1468).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/.[0-1468).parquet_cache/locks/2024-12-14T16:07:05.595809.json\n", - "2024-12-14 16:07:05.610 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients.\n", - "2024-12-14 16:07:05.610 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet to determine row count.\n", - "2024-12-14 16:07:05.612 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 16:07:05.612 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet as Parquet with kwargs:\n", - ".\n", - "2024-12-14 16:07:05.612 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['dod', 'gender', 'subject_id', 'year_of_birth']\n", - "2024-12-14 16:07:05.614 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, dod, gender, subject_id, year_of_birth\n", - "2024-12-14 16:07:05.617 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 100 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet.\n", - "2024-12-14 16:07:05.617 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet into 1 row-chunks of size 200000000.\n", - "2024-12-14 16:07:05.617 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet row-chunk [0-100) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet.\n", - "2024-12-14 16:07:05.620 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:05.619958. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:05.620 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet\n", - "2024-12-14 16:07:05.620 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 16:07:05.620 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet as Parquet with kwargs:\n", - ".\n", - "2024-12-14 16:07:05.620 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['dod', 'gender', 'subject_id', 'year_of_birth']\n", - "2024-12-14 16:07:05.620 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, dod, gender, subject_id, year_of_birth\n", - "2024-12-14 16:07:05.620 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:05.620 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet\n", - "2024-12-14 16:07:05.622 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.002234\n", - "2024-12-14 16:07:05.622 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/.[0-100).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/.[0-100).parquet_cache/locks/2024-12-14T16:07:05.619958.json\n", - "2024-12-14 16:07:05.624 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/transfers.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers.\n", - "2024-12-14 16:07:05.624 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv to determine row count.\n", - "2024-12-14 16:07:05.626 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv as CSV with kwargs:\n", + "2024-12-14 17:15:29.569 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'emar_id', 'emar_seq', 'event_txt', 'hadm_id', 'medication', 'subject_id']\n", + "2024-12-14 17:15:29.600 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, emar_id, emar_seq, event_txt, hadm_id, medication, subject_id\n", + "2024-12-14 17:15:29.600 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:29.600 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet\n", + "2024-12-14 17:15:29.640 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.071693\n", + "2024-12-14 17:15:29.640 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/.[0-35835).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/.[0-35835).parquet_cache/locks/2024-12-14T17:15:29.568485.json\n", + "2024-12-14 17:15:29.643 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/transfers.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers.\n", + "2024-12-14 17:15:29.643 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv to determine row count.\n", + "2024-12-14 17:15:29.645 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:05.626 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['careunit', 'eventtype', 'hadm_id', 'intime', 'subject_id']\n", - "2024-12-14 16:07:05.628 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, careunit, eventtype, hadm_id, intime, subject_id\n", - "2024-12-14 16:07:05.628 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 1190 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv.\n", - "2024-12-14 16:07:05.628 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/transfers.csv into 1 row-chunks of size 200000000.\n", - "2024-12-14 16:07:05.628 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/transfers.csv row-chunk [0-1190) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet.\n", - "2024-12-14 16:07:05.631 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:05.631167. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:05.631 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/transfers.csv\n", - "2024-12-14 16:07:05.631 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv as CSV with kwargs:\n", + "2024-12-14 17:15:29.645 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['careunit', 'eventtype', 'hadm_id', 'intime', 'subject_id']\n", + "2024-12-14 17:15:29.646 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, careunit, eventtype, hadm_id, intime, subject_id\n", + "2024-12-14 17:15:29.647 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 1190 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv.\n", + "2024-12-14 17:15:29.647 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/transfers.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:15:29.647 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/transfers.csv row-chunk [0-1190) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet.\n", + "2024-12-14 17:15:29.650 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:29.649856. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:29.650 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/transfers.csv\n", + "2024-12-14 17:15:29.650 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:05.631 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['careunit', 'eventtype', 'hadm_id', 'intime', 'subject_id']\n", - "2024-12-14 16:07:05.632 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, careunit, eventtype, hadm_id, intime, subject_id\n", - "2024-12-14 16:07:05.632 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:05.633 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet\n", - "2024-12-14 16:07:05.636 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.005445\n", - "2024-12-14 16:07:05.636 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/.[0-1190).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/.[0-1190).parquet_cache/locks/2024-12-14T16:07:05.631167.json\n", - "2024-12-14 16:07:05.639 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/procedures_icd.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd.\n", - "2024-12-14 16:07:05.639 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv to determine row count.\n", - "2024-12-14 16:07:05.641 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv as CSV with kwargs:\n", + "2024-12-14 17:15:29.650 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['careunit', 'eventtype', 'hadm_id', 'intime', 'subject_id']\n", + "2024-12-14 17:15:29.651 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, careunit, eventtype, hadm_id, intime, subject_id\n", + "2024-12-14 17:15:29.651 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:29.651 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet\n", + "2024-12-14 17:15:29.655 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.005833\n", + "2024-12-14 17:15:29.655 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/.[0-1190).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/.[0-1190).parquet_cache/locks/2024-12-14T17:15:29.649856.json\n", + "2024-12-14 17:15:29.658 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/labevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents.\n", + "2024-12-14 17:15:29.658 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv to determine row count.\n", + "2024-12-14 17:15:29.660 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:05.641 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['chartdate', 'hadm_id', 'icd_code', 'icd_version', 'subject_id']\n", - "2024-12-14 16:07:05.642 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, chartdate, hadm_id, icd_code, icd_version, subject_id\n", - "2024-12-14 16:07:05.642 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 722 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv.\n", - "2024-12-14 16:07:05.642 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/procedures_icd.csv into 1 row-chunks of size 200000000.\n", - "2024-12-14 16:07:05.642 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/procedures_icd.csv row-chunk [0-722) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet.\n", - "2024-12-14 16:07:05.645 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:05.644982. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:05.645 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/procedures_icd.csv\n", - "2024-12-14 16:07:05.645 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv as CSV with kwargs:\n", + "2024-12-14 17:15:29.660 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'hadm_id', 'itemid', 'priority', 'subject_id', 'value', 'valuenum', 'valueuom']\n", + "2024-12-14 17:15:29.764 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, hadm_id, itemid, priority, subject_id, value, valuenum, valueuom\n", + "2024-12-14 17:15:29.771 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 107727 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv.\n", + "2024-12-14 17:15:29.771 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/labevents.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:15:29.771 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/labevents.csv row-chunk [0-107727) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet.\n", + "2024-12-14 17:15:29.774 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:29.773914. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:29.774 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/labevents.csv\n", + "2024-12-14 17:15:29.774 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:05.645 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['chartdate', 'hadm_id', 'icd_code', 'icd_version', 'subject_id']\n", - "2024-12-14 16:07:05.646 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, chartdate, hadm_id, icd_code, icd_version, subject_id\n", - "2024-12-14 16:07:05.646 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:05.646 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet\n", - "2024-12-14 16:07:05.648 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.003102\n", - "2024-12-14 16:07:05.648 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/.[0-722).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/.[0-722).parquet_cache/locks/2024-12-14T16:07:05.644982.json\n", - "2024-12-14 16:07:05.650 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes.\n", - "2024-12-14 16:07:05.650 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet to determine row count.\n", - "2024-12-14 16:07:05.652 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 16:07:05.652 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet as Parquet with kwargs:\n", - ".\n", - "2024-12-14 16:07:05.652 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['description', 'drg_code', 'drg_mortality', 'drg_severity', 'drg_type', 'hadm_discharge_time', 'hadm_id', 'subject_id']\n", - "2024-12-14 16:07:05.653 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, description, drg_code, drg_mortality, drg_severity, drg_type, hadm_discharge_time, hadm_id, subject_id\n", - "2024-12-14 16:07:05.653 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 454 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet.\n", - "2024-12-14 16:07:05.653 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet into 1 row-chunks of size 200000000.\n", - "2024-12-14 16:07:05.653 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet row-chunk [0-454) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet.\n", - "2024-12-14 16:07:05.655 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:05.655772. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:05.656 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet\n", - "2024-12-14 16:07:05.656 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 16:07:05.656 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet as Parquet with kwargs:\n", - ".\n", - "2024-12-14 16:07:05.656 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['description', 'drg_code', 'drg_mortality', 'drg_severity', 'drg_type', 'hadm_discharge_time', 'hadm_id', 'subject_id']\n", - "2024-12-14 16:07:05.656 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, description, drg_code, drg_mortality, drg_severity, drg_type, hadm_discharge_time, hadm_id, subject_id\n", - "2024-12-14 16:07:05.656 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:05.656 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet\n", - "2024-12-14 16:07:05.658 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.002542\n", - "2024-12-14 16:07:05.658 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/.[0-454).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/.[0-454).parquet_cache/locks/2024-12-14T16:07:05.655772.json\n", - "2024-12-14 16:07:05.660 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/pharmacy.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy.\n", - "2024-12-14 16:07:05.661 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv to determine row count.\n", - "2024-12-14 16:07:05.663 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv as CSV with kwargs:\n", + "2024-12-14 17:15:29.774 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'hadm_id', 'itemid', 'priority', 'subject_id', 'value', 'valuenum', 'valueuom']\n", + "2024-12-14 17:15:29.877 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, hadm_id, itemid, priority, subject_id, value, valuenum, valueuom\n", + "2024-12-14 17:15:29.877 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:29.878 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet\n", + "2024-12-14 17:15:29.928 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.154632\n", + "2024-12-14 17:15:29.928 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/.[0-107727).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/.[0-107727).parquet_cache/locks/2024-12-14T17:15:29.773914.json\n", + "2024-12-14 17:15:29.931 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/chartevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents.\n", + "2024-12-14 17:15:29.931 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv to determine row count.\n", + "2024-12-14 17:15:29.933 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:05.663 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['doses_per_24_hrs', 'frequency', 'medication', 'poe_id', 'route', 'starttime', 'stoptime', 'subject_id']\n", - "2024-12-14 16:07:05.697 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, doses_per_24_hrs, frequency, medication, poe_id, route, starttime, stoptime, subject_id\n", - "2024-12-14 16:07:05.699 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 15306 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv.\n", - "2024-12-14 16:07:05.699 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/pharmacy.csv into 1 row-chunks of size 200000000.\n", - "2024-12-14 16:07:05.699 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/pharmacy.csv row-chunk [0-15306) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet.\n", - "2024-12-14 16:07:05.702 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:05.702392. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:05.702 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/pharmacy.csv\n", - "2024-12-14 16:07:05.702 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv as CSV with kwargs:\n", + "2024-12-14 17:15:29.933 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'hadm_id', 'itemid', 'stay_id', 'subject_id', 'value', 'valuenum', 'valueuom']\n", + "2024-12-14 17:15:30.414 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, hadm_id, itemid, stay_id, subject_id, value, valuenum, valueuom\n", + "2024-12-14 17:15:30.444 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 668862 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv.\n", + "2024-12-14 17:15:30.444 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/chartevents.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:15:30.444 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/chartevents.csv row-chunk [0-668862) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet.\n", + "2024-12-14 17:15:30.448 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:30.447796. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:30.448 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/chartevents.csv\n", + "2024-12-14 17:15:30.448 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:05.702 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['doses_per_24_hrs', 'frequency', 'medication', 'poe_id', 'route', 'starttime', 'stoptime', 'subject_id']\n", - "2024-12-14 16:07:05.737 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, doses_per_24_hrs, frequency, medication, poe_id, route, starttime, stoptime, subject_id\n", - "2024-12-14 16:07:05.737 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:05.737 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet\n", - "2024-12-14 16:07:05.765 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.063267\n", - "2024-12-14 16:07:05.765 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/.[0-15306).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/.[0-15306).parquet_cache/locks/2024-12-14T16:07:05.702392.json\n", - "2024-12-14 16:07:05.768 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd.\n", - "2024-12-14 16:07:05.768 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet to determine row count.\n", - "2024-12-14 16:07:05.770 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 16:07:05.770 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet as Parquet with kwargs:\n", - ".\n", - "2024-12-14 16:07:05.770 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['hadm_discharge_time', 'hadm_id', 'icd_code', 'icd_version', 'subject_id']\n", - "2024-12-14 16:07:05.770 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, hadm_discharge_time, hadm_id, icd_code, icd_version, subject_id\n", - "2024-12-14 16:07:05.771 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 4506 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet.\n", - "2024-12-14 16:07:05.771 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet into 1 row-chunks of size 200000000.\n", - "2024-12-14 16:07:05.771 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet row-chunk [0-4506) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet.\n", - "2024-12-14 16:07:05.773 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:05.773633. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:05.773 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet\n", - "2024-12-14 16:07:05.773 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 16:07:05.774 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet as Parquet with kwargs:\n", - ".\n", - "2024-12-14 16:07:05.774 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['hadm_discharge_time', 'hadm_id', 'icd_code', 'icd_version', 'subject_id']\n", - "2024-12-14 16:07:05.774 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, hadm_discharge_time, hadm_id, icd_code, icd_version, subject_id\n", - "2024-12-14 16:07:05.774 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:05.774 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet\n", - "2024-12-14 16:07:05.776 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.003029\n", - "2024-12-14 16:07:05.776 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/.[0-4506).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/.[0-4506).parquet_cache/locks/2024-12-14T16:07:05.773633.json\n", - "2024-12-14 16:07:05.779 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/inputevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents.\n", - "2024-12-14 16:07:05.779 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv to determine row count.\n", - "2024-12-14 16:07:05.781 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv as CSV with kwargs:\n", + "2024-12-14 17:15:30.448 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'hadm_id', 'itemid', 'stay_id', 'subject_id', 'value', 'valuenum', 'valueuom']\n", + "2024-12-14 17:15:30.923 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, hadm_id, itemid, stay_id, subject_id, value, valuenum, valueuom\n", + "2024-12-14 17:15:30.923 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:30.923 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet\n", + "2024-12-14 17:15:31.111 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.664018\n", + "2024-12-14 17:15:31.111 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/.[0-668862).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/.[0-668862).parquet_cache/locks/2024-12-14T17:15:30.447796.json\n", + "2024-12-14 17:15:31.114 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/hcpcsevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents.\n", + "2024-12-14 17:15:31.115 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/hcpcsevents.csv to determine row count.\n", + "2024-12-14 17:15:31.117 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/hcpcsevents.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:05.781 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['amount', 'amountuom', 'endtime', 'hadm_id', 'itemid', 'linkorderid', 'ordercategorydescription', 'orderid', 'patientweight', 'rate', 'rateuom', 'starttime', 'statusdescription', 'stay_id', 'subject_id']\n", - "2024-12-14 16:07:05.842 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, amount, amountuom, endtime, hadm_id, itemid, linkorderid, ordercategorydescription, orderid, patientweight, rate, rateuom, starttime, statusdescription, stay_id, subject_id\n", - "2024-12-14 16:07:05.845 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 20404 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv.\n", - "2024-12-14 16:07:05.846 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/inputevents.csv into 1 row-chunks of size 200000000.\n", - "2024-12-14 16:07:05.846 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/inputevents.csv row-chunk [0-20404) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet.\n", - "2024-12-14 16:07:05.848 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:05.848754. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:05.849 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/inputevents.csv\n", - "2024-12-14 16:07:05.849 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv as CSV with kwargs:\n", + "2024-12-14 17:15:31.117 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['chartdate', 'hadm_id', 'short_description', 'subject_id']\n", + "2024-12-14 17:15:31.117 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, chartdate, hadm_id, short_description, subject_id\n", + "2024-12-14 17:15:31.117 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 61 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/hcpcsevents.csv.\n", + "2024-12-14 17:15:31.118 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/hcpcsevents.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:15:31.118 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/hcpcsevents.csv row-chunk [0-61) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet.\n", + "2024-12-14 17:15:31.120 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:31.120550. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:31.120 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/hcpcsevents.csv\n", + "2024-12-14 17:15:31.120 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/hcpcsevents.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:05.849 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['amount', 'amountuom', 'endtime', 'hadm_id', 'itemid', 'linkorderid', 'ordercategorydescription', 'orderid', 'patientweight', 'rate', 'rateuom', 'starttime', 'statusdescription', 'stay_id', 'subject_id']\n", - "2024-12-14 16:07:05.910 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, amount, amountuom, endtime, hadm_id, itemid, linkorderid, ordercategorydescription, orderid, patientweight, rate, rateuom, starttime, statusdescription, stay_id, subject_id\n", - "2024-12-14 16:07:05.910 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:05.910 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet\n", - "2024-12-14 16:07:05.940 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.092049\n", - "2024-12-14 16:07:05.940 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/.[0-20404).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/.[0-20404).parquet_cache/locks/2024-12-14T16:07:05.848754.json\n", - "2024-12-14 16:07:05.944 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/icustays.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays.\n", - "2024-12-14 16:07:05.944 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/icustays.csv to determine row count.\n", - "2024-12-14 16:07:05.946 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/icustays.csv as CSV with kwargs:\n", + "2024-12-14 17:15:31.121 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['chartdate', 'hadm_id', 'short_description', 'subject_id']\n", + "2024-12-14 17:15:31.121 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, chartdate, hadm_id, short_description, subject_id\n", + "2024-12-14 17:15:31.121 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:31.121 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 17:15:31.122 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.002100\n", + "2024-12-14 17:15:31.122 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/.[0-61).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/.[0-61).parquet_cache/locks/2024-12-14T17:15:31.120550.json\n", + "2024-12-14 17:15:31.124 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/admissions.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions.\n", + "2024-12-14 17:15:31.125 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv to determine row count.\n", + "2024-12-14 17:15:31.127 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:05.947 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['first_careunit', 'hadm_id', 'intime', 'last_careunit', 'outtime', 'stay_id', 'subject_id']\n", - "2024-12-14 16:07:05.947 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, first_careunit, hadm_id, intime, last_careunit, outtime, stay_id, subject_id\n", - "2024-12-14 16:07:05.947 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 140 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/icustays.csv.\n", - "2024-12-14 16:07:05.947 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/icustays.csv into 1 row-chunks of size 200000000.\n", - "2024-12-14 16:07:05.947 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/icustays.csv row-chunk [0-140) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet.\n", - "2024-12-14 16:07:05.950 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:05.950272. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:05.950 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/icustays.csv\n", - "2024-12-14 16:07:05.950 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/icustays.csv as CSV with kwargs:\n", + "2024-12-14 17:15:31.127 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['admission_location', 'admission_type', 'admittime', 'discharge_location', 'dischtime', 'edouttime', 'edregtime', 'hadm_id', 'insurance', 'language', 'marital_status', 'race', 'subject_id']\n", + "2024-12-14 17:15:31.127 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, admission_location, admission_type, admittime, discharge_location, dischtime, edouttime, edregtime, hadm_id, insurance, language, marital_status, race, subject_id\n", + "2024-12-14 17:15:31.128 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 275 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv.\n", + "2024-12-14 17:15:31.128 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/admissions.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:15:31.128 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/admissions.csv row-chunk [0-275) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet.\n", + "2024-12-14 17:15:31.130 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:31.130847. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:31.131 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/admissions.csv\n", + "2024-12-14 17:15:31.131 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:05.950 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['first_careunit', 'hadm_id', 'intime', 'last_careunit', 'outtime', 'stay_id', 'subject_id']\n", - "2024-12-14 16:07:05.951 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, first_careunit, hadm_id, intime, last_careunit, outtime, stay_id, subject_id\n", - "2024-12-14 16:07:05.951 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:05.951 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet\n", - "2024-12-14 16:07:05.953 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.002913\n", - "2024-12-14 16:07:05.953 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/.[0-140).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/.[0-140).parquet_cache/locks/2024-12-14T16:07:05.950272.json\n", - "2024-12-14 16:07:05.955 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/hcpcsevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents.\n", - "2024-12-14 16:07:05.955 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/hcpcsevents.csv to determine row count.\n", - "2024-12-14 16:07:05.957 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/hcpcsevents.csv as CSV with kwargs:\n", + "2024-12-14 17:15:31.131 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['admission_location', 'admission_type', 'admittime', 'discharge_location', 'dischtime', 'edouttime', 'edregtime', 'hadm_id', 'insurance', 'language', 'marital_status', 'race', 'subject_id']\n", + "2024-12-14 17:15:31.131 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, admission_location, admission_type, admittime, discharge_location, dischtime, edouttime, edregtime, hadm_id, insurance, language, marital_status, race, subject_id\n", + "2024-12-14 17:15:31.131 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:31.131 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet\n", + "2024-12-14 17:15:31.135 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.004670\n", + "2024-12-14 17:15:31.135 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/.[0-275).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/.[0-275).parquet_cache/locks/2024-12-14T17:15:31.130847.json\n", + "2024-12-14 17:15:31.137 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/inputevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents.\n", + "2024-12-14 17:15:31.138 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv to determine row count.\n", + "2024-12-14 17:15:31.141 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:05.958 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['chartdate', 'hadm_id', 'short_description', 'subject_id']\n", - "2024-12-14 16:07:05.958 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, chartdate, hadm_id, short_description, subject_id\n", - "2024-12-14 16:07:05.958 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 61 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/hcpcsevents.csv.\n", - "2024-12-14 16:07:05.958 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/hcpcsevents.csv into 1 row-chunks of size 200000000.\n", - "2024-12-14 16:07:05.958 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/hcpcsevents.csv row-chunk [0-61) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet.\n", - "2024-12-14 16:07:05.960 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:05.960658. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:05.960 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/hcpcsevents.csv\n", - "2024-12-14 16:07:05.961 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/hcpcsevents.csv as CSV with kwargs:\n", + "2024-12-14 17:15:31.141 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['amount', 'amountuom', 'endtime', 'hadm_id', 'itemid', 'linkorderid', 'ordercategorydescription', 'orderid', 'patientweight', 'rate', 'rateuom', 'starttime', 'statusdescription', 'stay_id', 'subject_id']\n", + "2024-12-14 17:15:31.177 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, amount, amountuom, endtime, hadm_id, itemid, linkorderid, ordercategorydescription, orderid, patientweight, rate, rateuom, starttime, statusdescription, stay_id, subject_id\n", + "2024-12-14 17:15:31.180 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 20404 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv.\n", + "2024-12-14 17:15:31.180 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/inputevents.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:15:31.180 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/inputevents.csv row-chunk [0-20404) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet.\n", + "2024-12-14 17:15:31.183 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:31.183076. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:31.183 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/inputevents.csv\n", + "2024-12-14 17:15:31.183 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:05.961 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['chartdate', 'hadm_id', 'short_description', 'subject_id']\n", - "2024-12-14 16:07:05.961 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, chartdate, hadm_id, short_description, subject_id\n", - "2024-12-14 16:07:05.961 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:05.961 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet\n", - "2024-12-14 16:07:05.962 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.001616\n", - "2024-12-14 16:07:05.962 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/.[0-61).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/.[0-61).parquet_cache/locks/2024-12-14T16:07:05.960658.json\n", - "2024-12-14 16:07:05.964 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/omr.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr.\n", - "2024-12-14 16:07:05.965 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/omr.csv to determine row count.\n", - "2024-12-14 16:07:05.967 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/omr.csv as CSV with kwargs:\n", + "2024-12-14 17:15:31.183 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['amount', 'amountuom', 'endtime', 'hadm_id', 'itemid', 'linkorderid', 'ordercategorydescription', 'orderid', 'patientweight', 'rate', 'rateuom', 'starttime', 'statusdescription', 'stay_id', 'subject_id']\n", + "2024-12-14 17:15:31.220 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, amount, amountuom, endtime, hadm_id, itemid, linkorderid, ordercategorydescription, orderid, patientweight, rate, rateuom, starttime, statusdescription, stay_id, subject_id\n", + "2024-12-14 17:15:31.220 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:31.220 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet\n", + "2024-12-14 17:15:31.240 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.057787\n", + "2024-12-14 17:15:31.240 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/.[0-20404).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/.[0-20404).parquet_cache/locks/2024-12-14T17:15:31.183076.json\n", + "2024-12-14 17:15:31.244 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd.\n", + "2024-12-14 17:15:31.244 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet to determine row count.\n", + "2024-12-14 17:15:31.246 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:15:31.246 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:15:31.246 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['hadm_discharge_time', 'hadm_id', 'icd_code', 'icd_version', 'subject_id']\n", + "2024-12-14 17:15:31.246 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, hadm_discharge_time, hadm_id, icd_code, icd_version, subject_id\n", + "2024-12-14 17:15:31.247 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 4506 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet.\n", + "2024-12-14 17:15:31.247 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:15:31.247 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet row-chunk [0-4506) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet.\n", + "2024-12-14 17:15:31.249 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:31.249669. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:31.249 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet\n", + "2024-12-14 17:15:31.250 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:15:31.250 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:15:31.250 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['hadm_discharge_time', 'hadm_id', 'icd_code', 'icd_version', 'subject_id']\n", + "2024-12-14 17:15:31.250 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, hadm_discharge_time, hadm_id, icd_code, icd_version, subject_id\n", + "2024-12-14 17:15:31.250 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:31.250 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 17:15:31.252 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.003216\n", + "2024-12-14 17:15:31.252 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/.[0-4506).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/.[0-4506).parquet_cache/locks/2024-12-14T17:15:31.249669.json\n", + "2024-12-14 17:15:31.255 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/pharmacy.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy.\n", + "2024-12-14 17:15:31.255 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv to determine row count.\n", + "2024-12-14 17:15:31.257 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:05.967 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['chartdate', 'result_name', 'result_value', 'subject_id']\n", - "2024-12-14 16:07:05.968 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, chartdate, result_name, result_value, subject_id\n", - "2024-12-14 16:07:05.969 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 2964 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/omr.csv.\n", - "2024-12-14 16:07:05.969 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/omr.csv into 1 row-chunks of size 200000000.\n", - "2024-12-14 16:07:05.969 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/omr.csv row-chunk [0-2964) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet.\n", - "2024-12-14 16:07:05.972 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:05.971901. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:05.972 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/omr.csv\n", - "2024-12-14 16:07:05.972 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/omr.csv as CSV with kwargs:\n", + "2024-12-14 17:15:31.257 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['doses_per_24_hrs', 'frequency', 'medication', 'poe_id', 'route', 'starttime', 'stoptime', 'subject_id']\n", + "2024-12-14 17:15:31.278 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, doses_per_24_hrs, frequency, medication, poe_id, route, starttime, stoptime, subject_id\n", + "2024-12-14 17:15:31.280 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 15306 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv.\n", + "2024-12-14 17:15:31.280 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/pharmacy.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:15:31.280 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/pharmacy.csv row-chunk [0-15306) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet.\n", + "2024-12-14 17:15:31.283 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:31.283277. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:31.283 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/pharmacy.csv\n", + "2024-12-14 17:15:31.283 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:05.972 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['chartdate', 'result_name', 'result_value', 'subject_id']\n", - "2024-12-14 16:07:05.974 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, chartdate, result_name, result_value, subject_id\n", - "2024-12-14 16:07:05.974 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:05.974 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet\n", - "2024-12-14 16:07:05.978 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006211\n", - "2024-12-14 16:07:05.978 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/.[0-2964).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/.[0-2964).parquet_cache/locks/2024-12-14T16:07:05.971901.json\n", - "2024-12-14 16:07:05.980 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/labevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents.\n", - "2024-12-14 16:07:05.980 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv to determine row count.\n", - "2024-12-14 16:07:05.982 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv as CSV with kwargs:\n", + "2024-12-14 17:15:31.283 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['doses_per_24_hrs', 'frequency', 'medication', 'poe_id', 'route', 'starttime', 'stoptime', 'subject_id']\n", + "2024-12-14 17:15:31.304 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, doses_per_24_hrs, frequency, medication, poe_id, route, starttime, stoptime, subject_id\n", + "2024-12-14 17:15:31.304 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:31.305 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 17:15:31.337 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.054084\n", + "2024-12-14 17:15:31.337 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/.[0-15306).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/.[0-15306).parquet_cache/locks/2024-12-14T17:15:31.283277.json\n", + "2024-12-14 17:15:31.340 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/omr.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr.\n", + "2024-12-14 17:15:31.340 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/omr.csv to determine row count.\n", + "2024-12-14 17:15:31.342 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/omr.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:05.983 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'hadm_id', 'itemid', 'priority', 'subject_id', 'value', 'valuenum', 'valueuom']\n", - "2024-12-14 16:07:06.155 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, hadm_id, itemid, priority, subject_id, value, valuenum, valueuom\n", - "2024-12-14 16:07:06.161 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 107727 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv.\n", - "2024-12-14 16:07:06.161 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/labevents.csv into 1 row-chunks of size 200000000.\n", - "2024-12-14 16:07:06.161 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/labevents.csv row-chunk [0-107727) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet.\n", - "2024-12-14 16:07:06.164 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:06.164674. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:06.165 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/labevents.csv\n", - "2024-12-14 16:07:06.165 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv as CSV with kwargs:\n", + "2024-12-14 17:15:31.342 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['chartdate', 'result_name', 'result_value', 'subject_id']\n", + "2024-12-14 17:15:31.343 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, chartdate, result_name, result_value, subject_id\n", + "2024-12-14 17:15:31.344 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 2964 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/omr.csv.\n", + "2024-12-14 17:15:31.344 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/omr.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:15:31.344 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/omr.csv row-chunk [0-2964) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet.\n", + "2024-12-14 17:15:31.346 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:31.346587. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:31.346 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/omr.csv\n", + "2024-12-14 17:15:31.346 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/omr.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:06.165 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'hadm_id', 'itemid', 'priority', 'subject_id', 'value', 'valuenum', 'valueuom']\n", - "2024-12-14 16:07:06.337 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, hadm_id, itemid, priority, subject_id, value, valuenum, valueuom\n", - "2024-12-14 16:07:06.337 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:06.338 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet\n", - "2024-12-14 16:07:06.384 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.220152\n", - "2024-12-14 16:07:06.384 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/.[0-107727).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/.[0-107727).parquet_cache/locks/2024-12-14T16:07:06.164674.json\n", - "2024-12-14 16:07:06.385 | INFO | MEDS_transforms.extract.shard_events:main:430 - Sub-sharding completed in 0:00:02.594945\n", + "2024-12-14 17:15:31.347 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['chartdate', 'result_name', 'result_value', 'subject_id']\n", + "2024-12-14 17:15:31.348 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, chartdate, result_name, result_value, subject_id\n", + "2024-12-14 17:15:31.348 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:31.348 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet\n", + "2024-12-14 17:15:31.352 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.005502\n", + "2024-12-14 17:15:31.352 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/.[0-2964).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/.[0-2964).parquet_cache/locks/2024-12-14T17:15:31.346587.json\n", + "2024-12-14 17:15:31.352 | INFO | MEDS_transforms.extract.shard_events:main:430 - Sub-sharding completed in 0:00:01.985358\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:06.447\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: split_and_shard_subjects\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:06.455\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-split_and_shard_subjects --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=split_and_shard_subjects\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:07.210\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[32m2024-12-14 17:15:31.406\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: split_and_shard_subjects\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:31.413\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-split_and_shard_subjects --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=split_and_shard_subjects\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:32.147\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:07.210\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", - "2024-12-14 16:07:07.037 | INFO | MEDS_transforms.utils:stage_init:73 - Running split_and_shard_subjects with the following configuration:\n", + "\u001b[32m2024-12-14 17:15:32.147\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 17:15:31.960 | INFO | MEDS_transforms.utils:stage_init:73 - Running split_and_shard_subjects with the following configuration:\n", "input_dir: ${oc.env:MIMICIV_PRE_MEDS_DIR}\n", "cohort_dir: ${oc.env:MIMICIV_MEDS_COHORT_DIR}\n", "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", @@ -940,9 +936,9 @@ " n_subjects_per_shard: 1000\n", " external_splits_json_fp: null\n", " split_fracs:\n", - " train: 0.8\n", - " tuning: 0.1\n", - " held_out: 0.1\n", + " train: 0.5\n", + " tuning: 0.25\n", + " held_out: 0.25\n", " convert_to_sharded_events:\n", " do_dedup_text_and_numeric: true\n", " merge_to_MEDS_cohort:\n", @@ -958,10 +954,6 @@ " do_retype: true\n", " finalize_MEDS_data:\n", " do_retype: true\n", - " split_fracs:\n", - " train: 0.5\n", - " tuning: 0.25\n", - " held_out: 0.25\n", "worker: 0\n", "polling_time: 300\n", "stage: split_and_shard_subjects\n", @@ -984,15 +976,15 @@ "event_conversion_config_fp: ${oc.env:EVENT_CONVERSION_CONFIG_FP}\n", "shards_map_fp: ${cohort_dir}/metadata/.shards.json\n", "\n", - "2024-12-14 16:07:07.051 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "2024-12-14 17:15:31.974 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", "is_metadata: true\n", "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//metadata\n", "n_subjects_per_shard: 1000\n", "external_splits_json_fp: null\n", "split_fracs:\n", - " train: 0.8\n", - " tuning: 0.1\n", - " held_out: 0.1\n", + " train: 0.5\n", + " tuning: 0.25\n", + " held_out: 0.25\n", "data_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events\n", "metadata_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/metadata\n", "reducer_output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/split_and_shard_subjects\n", @@ -1002,8 +994,8 @@ " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events\n", " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata\n", " - metadata_input_dir: ❌ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/metadata\n", - "2024-12-14 16:07:07.051 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:215 - Reading event conversion config from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs/event_configs.yaml (needed for subject ID columns)\n", - "2024-12-14 16:07:07.082 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:219 - Event conversion config:\n", + "2024-12-14 17:15:31.975 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:215 - Reading event conversion config from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs/event_configs.yaml (needed for subject ID columns)\n", + "2024-12-14 17:15:32.007 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:219 - Event conversion config:\n", "subject_id_col: subject_id\n", "hosp/admissions:\n", " ed_registration:\n", @@ -1326,53 +1318,53 @@ " valueuom: unitname\n", " parent_codes: '{omop_vocabulary_id}/{omop_concept_code}'\n", "\n", - "2024-12-14 16:07:07.083 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/admissions files:\n", + "2024-12-14 17:15:32.008 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/admissions files:\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet\n", - "2024-12-14 16:07:07.084 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/diagnoses_icd files:\n", + "2024-12-14 17:15:32.009 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/diagnoses_icd files:\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet\n", - "2024-12-14 16:07:07.084 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/drgcodes files:\n", + "2024-12-14 17:15:32.009 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/drgcodes files:\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet\n", - "2024-12-14 16:07:07.084 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/emar files:\n", + "2024-12-14 17:15:32.010 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/emar files:\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet\n", - "2024-12-14 16:07:07.085 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/hcpcsevents files:\n", + "2024-12-14 17:15:32.010 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/hcpcsevents files:\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet\n", - "2024-12-14 16:07:07.085 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/labevents files:\n", + "2024-12-14 17:15:32.010 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/labevents files:\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet\n", - "2024-12-14 16:07:07.085 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/omr files:\n", + "2024-12-14 17:15:32.010 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/omr files:\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet\n", - "2024-12-14 16:07:07.085 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/patients files:\n", + "2024-12-14 17:15:32.010 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/patients files:\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet\n", - "2024-12-14 16:07:07.085 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/pharmacy files:\n", + "2024-12-14 17:15:32.011 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/pharmacy files:\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet\n", - "2024-12-14 16:07:07.086 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/procedures_icd files:\n", + "2024-12-14 17:15:32.011 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/procedures_icd files:\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet\n", - "2024-12-14 16:07:07.086 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/transfers files:\n", + "2024-12-14 17:15:32.011 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/transfers files:\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet\n", - "2024-12-14 16:07:07.086 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from icu/icustays files:\n", + "2024-12-14 17:15:32.011 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from icu/icustays files:\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet\n", - "2024-12-14 16:07:07.086 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from icu/chartevents files:\n", + "2024-12-14 17:15:32.011 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from icu/chartevents files:\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet\n", - "2024-12-14 16:07:07.086 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from icu/procedureevents files:\n", + "2024-12-14 17:15:32.012 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from icu/procedureevents files:\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet\n", - "2024-12-14 16:07:07.087 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from icu/inputevents files:\n", + "2024-12-14 17:15:32.012 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from icu/inputevents files:\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet\n", - "2024-12-14 16:07:07.087 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from icu/outputevents files:\n", + "2024-12-14 17:15:32.012 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from icu/outputevents files:\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet\n", - "2024-12-14 16:07:07.087 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:239 - Joining all subject IDs from 16 dataframes\n", - "2024-12-14 16:07:07.125 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:247 - Found 100 unique subject IDs of type int64\n", - "2024-12-14 16:07:07.128 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:262 - Sharding and splitting subjects\n", - "2024-12-14 16:07:07.166 | INFO | MEDS_transforms.extract.split_and_shard_subjects:shard_subjects:164 - Split train/0 has 80 subjects.\n", - "2024-12-14 16:07:07.166 | INFO | MEDS_transforms.extract.split_and_shard_subjects:shard_subjects:164 - Split tuning/0 has 10 subjects.\n", - "2024-12-14 16:07:07.166 | INFO | MEDS_transforms.extract.split_and_shard_subjects:shard_subjects:164 - Split held_out/0 has 10 subjects.\n", - "2024-12-14 16:07:07.166 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:273 - Writing sharded subjects to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata/.shards.json\n", - "2024-12-14 16:07:07.167 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:276 - Done writing sharded subjects\n", + "2024-12-14 17:15:32.012 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:239 - Joining all subject IDs from 16 dataframes\n", + "2024-12-14 17:15:32.052 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:247 - Found 100 unique subject IDs of type int64\n", + "2024-12-14 17:15:32.056 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:262 - Sharding and splitting subjects\n", + "2024-12-14 17:15:32.101 | INFO | MEDS_transforms.extract.split_and_shard_subjects:shard_subjects:164 - Split train/0 has 50 subjects.\n", + "2024-12-14 17:15:32.101 | INFO | MEDS_transforms.extract.split_and_shard_subjects:shard_subjects:164 - Split tuning/0 has 25 subjects.\n", + "2024-12-14 17:15:32.101 | INFO | MEDS_transforms.extract.split_and_shard_subjects:shard_subjects:164 - Split held_out/0 has 25 subjects.\n", + "2024-12-14 17:15:32.102 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:273 - Writing sharded subjects to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata/.shards.json\n", + "2024-12-14 17:15:32.102 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:276 - Done writing sharded subjects\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:07.213\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: convert_to_sharded_events\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:07.221\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-convert_to_sharded_events --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=convert_to_sharded_events\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:08.742\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[32m2024-12-14 17:15:32.150\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: convert_to_sharded_events\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:32.157\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-convert_to_sharded_events --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=convert_to_sharded_events\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:33.739\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:08.743\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", - "2024-12-14 16:07:07.767 | INFO | MEDS_transforms.utils:stage_init:73 - Running convert_to_sharded_events with the following configuration:\n", + "\u001b[32m2024-12-14 17:15:33.739\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 17:15:32.724 | INFO | MEDS_transforms.utils:stage_init:73 - Running convert_to_sharded_events with the following configuration:\n", "input_dir: ${oc.env:MIMICIV_PRE_MEDS_DIR}\n", "cohort_dir: ${oc.env:MIMICIV_MEDS_COHORT_DIR}\n", "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", @@ -1401,9 +1393,9 @@ " n_subjects_per_shard: 1000\n", " external_splits_json_fp: null\n", " split_fracs:\n", - " train: 0.8\n", - " tuning: 0.1\n", - " held_out: 0.1\n", + " train: 0.5\n", + " tuning: 0.25\n", + " held_out: 0.25\n", " convert_to_sharded_events:\n", " do_dedup_text_and_numeric: true\n", " merge_to_MEDS_cohort:\n", @@ -1419,10 +1411,6 @@ " do_retype: true\n", " finalize_MEDS_data:\n", " do_retype: true\n", - " split_fracs:\n", - " train: 0.5\n", - " tuning: 0.25\n", - " held_out: 0.25\n", "worker: 0\n", "polling_time: 300\n", "stage: convert_to_sharded_events\n", @@ -1445,7 +1433,7 @@ "event_conversion_config_fp: ${oc.env:EVENT_CONVERSION_CONFIG_FP}\n", "shards_map_fp: ${cohort_dir}/metadata/.shards.json\n", "\n", - "2024-12-14 16:07:07.780 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "2024-12-14 17:15:32.738 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", "do_dedup_text_and_numeric: true\n", "is_metadata: false\n", "data_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events\n", @@ -1457,9 +1445,9 @@ " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events\n", " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events\n", " - metadata_input_dir: ❌ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/split_and_shard_subjects\n", - "2024-12-14 16:07:07.781 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:769 - Starting event conversion.\n", - "2024-12-14 16:07:07.781 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:771 - Reading event conversion config from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs/event_configs.yaml\n", - "2024-12-14 16:07:07.811 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:773 - Event conversion config:\n", + "2024-12-14 17:15:32.739 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:769 - Starting event conversion.\n", + "2024-12-14 17:15:32.739 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:771 - Reading event conversion config from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs/event_configs.yaml\n", + "2024-12-14 17:15:32.771 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:773 - Event conversion config:\n", "subject_id_col: subject_id\n", "hosp/admissions:\n", " ed_registration:\n", @@ -1782,19 +1770,36 @@ " valueuom: unitname\n", " parent_codes: '{omop_vocabulary_id}/{omop_concept_code}'\n", "\n", - "2024-12-14 16:07:07.820 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/transfers/[0-1190).parquet\n", - "2024-12-14 16:07:07.821 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:07.821527. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:07.822 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet\n", - "2024-12-14 16:07:07.823 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:32.780 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/patients/[0-100).parquet\n", + "2024-12-14 17:15:32.781 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:32.781478. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:32.783 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet\n", + "2024-12-14 17:15:32.783 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:802: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", " typed_subjects = pl.Series(subjects, dtype=df.schema[input_subject_id_column])\n", - "2024-12-14 16:07:07.829 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/transfers/[0-1190).parquet\n", - "2024-12-14 16:07:07.834 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting transfer\n", + "2024-12-14 17:15:32.790 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/patients/[0-100).parquet\n", + "2024-12-14 17:15:32.794 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting gender\n", "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:468: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", " if col not in df.schema:\n", - "2024-12-14 16:07:07.835 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column eventtype\n", - "2024-12-14 16:07:07.835 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column careunit\n", - "2024-12-14 16:07:07.835 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column intime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:32.796 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column gender\n", + "2024-12-14 17:15:32.796 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:493 - Adding null literate for time\n", + "2024-12-14 17:15:32.796 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting dob\n", + "2024-12-14 17:15:32.796 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column year_of_birth in possible formats %Y\n", + "2024-12-14 17:15:32.797 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"year_of_birth\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:32.797 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting death\n", + "2024-12-14 17:15:32.797 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column dod in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", + "2024-12-14 17:15:32.797 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dod\").str.strptime([String(raise)]).coalesce([col(\"dod\").str.strptime([String(raise)])]).is_not_null()\n", + "2024-12-14 17:15:32.797 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/patients/[0-100).parquet\n", + "2024-12-14 17:15:32.826 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.044811\n", + "2024-12-14 17:15:32.826 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/patients/.[0-100).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/patients/.[0-100).parquet_cache/locks/2024-12-14T17:15:32.781478.json\n", + "2024-12-14 17:15:32.827 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 17:15:32.827 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:32.827805. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:32.828 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 17:15:32.828 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:32.828 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 17:15:32.832 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting procedure\n", + "2024-12-14 17:15:32.833 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_version\n", + "2024-12-14 17:15:32.833 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_code\n", + "2024-12-14 17:15:32.833 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:513: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", " if v not in df.schema:\n", "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:517: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", @@ -1803,721 +1808,704 @@ " is_str = df.schema[v] == pl.Utf8\n", "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:519: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", " is_cat = isinstance(df.schema[v], pl.Categorical)\n", - "2024-12-14 16:07:07.836 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"intime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:07.836 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/transfers/[0-1190).parquet\n", - "2024-12-14 16:07:07.859 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.037877\n", - "2024-12-14 16:07:07.859 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/transfers/.[0-1190).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/transfers/.[0-1190).parquet_cache/locks/2024-12-14T16:07:07.821527.json\n", - "2024-12-14 16:07:07.859 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/omr/[0-2964).parquet\n", - "2024-12-14 16:07:07.860 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:07.860108. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:07.860 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet\n", - "2024-12-14 16:07:07.860 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:07.860 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/omr/[0-2964).parquet\n", - "2024-12-14 16:07:07.864 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting omr\n", - "2024-12-14 16:07:07.864 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column result_name\n", - "2024-12-14 16:07:07.864 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", - "2024-12-14 16:07:07.865 | WARNING | MEDS_transforms.extract.convert_to_sharded_events:extract_event:507 - Source column 'col(result_value)' for event column text_value is always interpreted as a column name. Removing col() function call and setting source column to result_value.\n", + "2024-12-14 17:15:32.833 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:32.833 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 17:15:32.838 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.010302\n", + "2024-12-14 17:15:32.838 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/procedures_icd/.[0-722).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/procedures_icd/.[0-722).parquet_cache/locks/2024-12-14T17:15:32.827805.json\n", + "2024-12-14 17:15:32.838 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 17:15:32.839 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:32.838982. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:32.839 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 17:15:32.839 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:32.839 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 17:15:32.843 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting hcpcs\n", + "2024-12-14 17:15:32.843 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column short_description\n", + "2024-12-14 17:15:32.843 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", + "2024-12-14 17:15:32.843 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:32.843 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 17:15:32.845 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006657\n", + "2024-12-14 17:15:32.845 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/hcpcsevents/.[0-61).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/hcpcsevents/.[0-61).parquet_cache/locks/2024-12-14T17:15:32.838982.json\n", + "2024-12-14 17:15:32.846 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/icustays/[0-140).parquet\n", + "2024-12-14 17:15:32.846 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:32.846632. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:32.846 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet\n", + "2024-12-14 17:15:32.846 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:32.847 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/icustays/[0-140).parquet\n", + "2024-12-14 17:15:32.850 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting icu_admission\n", + "2024-12-14 17:15:32.851 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column first_careunit\n", + "2024-12-14 17:15:32.851 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column intime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:32.851 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"intime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:32.851 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting icu_discharge\n", + "2024-12-14 17:15:32.851 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column last_careunit\n", + "2024-12-14 17:15:32.851 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column outtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:32.852 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"outtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:32.852 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/icustays/[0-140).parquet\n", + "2024-12-14 17:15:32.855 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008629\n", + "2024-12-14 17:15:32.855 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/icustays/.[0-140).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/icustays/.[0-140).parquet_cache/locks/2024-12-14T17:15:32.846632.json\n", + "2024-12-14 17:15:32.855 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 17:15:32.856 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:32.856164. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:32.856 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 17:15:32.856 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:32.856 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 17:15:32.860 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drg\n", + "2024-12-14 17:15:32.860 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drg_type\n", + "2024-12-14 17:15:32.861 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drg_code\n", + "2024-12-14 17:15:32.861 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column description\n", + "2024-12-14 17:15:32.861 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column hadm_discharge_time in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:32.861 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hadm_discharge_time\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:32.861 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 17:15:32.863 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007601\n", + "2024-12-14 17:15:32.863 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/drgcodes/.[0-454).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/drgcodes/.[0-454).parquet_cache/locks/2024-12-14T17:15:32.856164.json\n", + "2024-12-14 17:15:32.864 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/inputevents/[0-20404).parquet\n", + "2024-12-14 17:15:32.865 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:32.865354. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:32.865 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet\n", + "2024-12-14 17:15:32.865 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:32.866 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/inputevents/[0-20404).parquet\n", + "2024-12-14 17:15:32.871 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting input_start\n", + "2024-12-14 17:15:32.871 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 17:15:32.871 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:32.872 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:32.872 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting input_end\n", + "2024-12-14 17:15:32.872 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 17:15:32.872 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column endtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:32.873 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"endtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:32.873 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting subject_weight\n", + "2024-12-14 17:15:32.873 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:32.874 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:32.874 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/inputevents/[0-20404).parquet\n", + "2024-12-14 17:15:32.888 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.023607\n", + "2024-12-14 17:15:32.889 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/inputevents/.[0-20404).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/inputevents/.[0-20404).parquet_cache/locks/2024-12-14T17:15:32.865354.json\n", + "2024-12-14 17:15:32.889 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 17:15:32.890 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:32.889940. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:32.890 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 17:15:32.890 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:32.890 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 17:15:32.894 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting diagnosis\n", + "2024-12-14 17:15:32.895 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_version\n", + "2024-12-14 17:15:32.895 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_code\n", + "2024-12-14 17:15:32.895 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column hadm_discharge_time in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:32.895 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hadm_discharge_time\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:32.895 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 17:15:32.897 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007572\n", + "2024-12-14 17:15:32.897 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/diagnoses_icd/.[0-4506).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/diagnoses_icd/.[0-4506).parquet_cache/locks/2024-12-14T17:15:32.889940.json\n", + "2024-12-14 17:15:32.898 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/labevents/[0-107727).parquet\n", + "2024-12-14 17:15:32.898 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:32.898624. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:32.898 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet\n", + "2024-12-14 17:15:32.898 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:32.899 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/labevents/[0-107727).parquet\n", + "2024-12-14 17:15:32.903 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting lab\n", + "2024-12-14 17:15:32.904 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 17:15:32.904 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", + "2024-12-14 17:15:32.904 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:529: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", " case \"text_value\" if not df.schema[v] == pl.Utf8:\n", - "2024-12-14 16:07:07.865 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:558 - Filtering out rows with null codes via col(\"result_name\").is_not_null()\n", - "2024-12-14 16:07:07.865 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:07.865 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/omr/[0-2964).parquet\n", - "2024-12-14 16:07:07.868 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008277\n", - "2024-12-14 16:07:07.868 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/omr/.[0-2964).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/omr/.[0-2964).parquet_cache/locks/2024-12-14T16:07:07.860108.json\n", - "2024-12-14 16:07:07.869 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/procedureevents/[0-1468).parquet\n", - "2024-12-14 16:07:07.870 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:07.869972. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:07.870 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet\n", - "2024-12-14 16:07:07.870 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:07.870 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/procedureevents/[0-1468).parquet\n", - "2024-12-14 16:07:07.874 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting start\n", - "2024-12-14 16:07:07.875 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 16:07:07.875 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:07.875 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:07.875 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting end\n", - "2024-12-14 16:07:07.876 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 16:07:07.876 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column endtime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:07.876 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"endtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:07.876 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/procedureevents/[0-1468).parquet\n", - "2024-12-14 16:07:07.883 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.013825\n", - "2024-12-14 16:07:07.883 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/procedureevents/.[0-1468).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/procedureevents/.[0-1468).parquet_cache/locks/2024-12-14T16:07:07.869972.json\n", - "2024-12-14 16:07:07.884 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/diagnoses_icd/[0-4506).parquet\n", - "2024-12-14 16:07:07.884 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:07.884587. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:07.884 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet\n", - "2024-12-14 16:07:07.884 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:07.885 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/diagnoses_icd/[0-4506).parquet\n", - "2024-12-14 16:07:07.888 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting diagnosis\n", - "2024-12-14 16:07:07.889 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_code\n", - "2024-12-14 16:07:07.889 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_version\n", - "2024-12-14 16:07:07.889 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column hadm_discharge_time in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:07.889 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hadm_discharge_time\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:07.889 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/diagnoses_icd/[0-4506).parquet\n", - "2024-12-14 16:07:07.891 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006845\n", - "2024-12-14 16:07:07.891 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/diagnoses_icd/.[0-4506).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/diagnoses_icd/.[0-4506).parquet_cache/locks/2024-12-14T16:07:07.884587.json\n", - "2024-12-14 16:07:07.891 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/emar/[0-35835).parquet\n", - "2024-12-14 16:07:07.892 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:07.892220. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:07.892 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet\n", - "2024-12-14 16:07:07.892 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:07.892 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/emar/[0-35835).parquet\n", - "2024-12-14 16:07:07.896 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication\n", - "2024-12-14 16:07:07.896 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", - "2024-12-14 16:07:07.896 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column event_txt\n", - "2024-12-14 16:07:07.896 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:07.897 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:07.897 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/emar/[0-35835).parquet\n", - "2024-12-14 16:07:07.902 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.009786\n", - "2024-12-14 16:07:07.902 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/emar/.[0-35835).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/emar/.[0-35835).parquet_cache/locks/2024-12-14T16:07:07.892220.json\n", - "2024-12-14 16:07:07.902 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/admissions/[0-275).parquet\n", - "2024-12-14 16:07:07.903 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:07.903054. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:07.903 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet\n", - "2024-12-14 16:07:07.903 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:07.903 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/admissions/[0-275).parquet\n", - "2024-12-14 16:07:07.907 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ed_registration\n", - "2024-12-14 16:07:07.907 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column edregtime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:07.907 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"edregtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:07.907 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ed_out\n", - "2024-12-14 16:07:07.908 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column edouttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:07.908 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"edouttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:07.908 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting admission\n", - "2024-12-14 16:07:07.908 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admission_type\n", - "2024-12-14 16:07:07.908 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admission_location\n", - "2024-12-14 16:07:07.908 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column admittime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:07.909 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"admittime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:07.909 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting discharge\n", - "2024-12-14 16:07:07.909 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column discharge_location\n", - "2024-12-14 16:07:07.909 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column dischtime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:07.909 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dischtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:07.909 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/admissions/[0-275).parquet\n", - "2024-12-14 16:07:07.918 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.015170\n", - "2024-12-14 16:07:07.918 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/admissions/.[0-275).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/admissions/.[0-275).parquet_cache/locks/2024-12-14T16:07:07.903054.json\n", - "2024-12-14 16:07:07.918 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/patients/[0-100).parquet\n", - "2024-12-14 16:07:07.919 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:07.919077. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:07.919 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet\n", - "2024-12-14 16:07:07.919 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:07.919 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/patients/[0-100).parquet\n", - "2024-12-14 16:07:07.923 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting gender\n", - "2024-12-14 16:07:07.923 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column gender\n", - "2024-12-14 16:07:07.923 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:493 - Adding null literate for time\n", - "2024-12-14 16:07:07.923 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting dob\n", - "2024-12-14 16:07:07.923 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column year_of_birth in possible formats %Y\n", - "2024-12-14 16:07:07.923 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"year_of_birth\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:07.924 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting death\n", - "2024-12-14 16:07:07.924 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column dod in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", - "2024-12-14 16:07:07.924 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dod\").str.strptime([String(raise)]).coalesce([col(\"dod\").str.strptime([String(raise)])]).is_not_null()\n", - "2024-12-14 16:07:07.924 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/patients/[0-100).parquet\n", - "2024-12-14 16:07:07.928 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.009003\n", - "2024-12-14 16:07:07.928 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/patients/.[0-100).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/patients/.[0-100).parquet_cache/locks/2024-12-14T16:07:07.919077.json\n", - "2024-12-14 16:07:07.928 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/chartevents/[0-668862).parquet\n", - "2024-12-14 16:07:07.929 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:07.928924. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:07.929 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet\n", - "2024-12-14 16:07:07.929 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:07.929 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/chartevents/[0-668862).parquet\n", - "2024-12-14 16:07:07.933 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting event\n", - "2024-12-14 16:07:07.934 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", - "2024-12-14 16:07:07.934 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 16:07:07.934 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:07.935 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:07.935 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/chartevents/[0-668862).parquet\n", - "2024-12-14 16:07:07.968 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.039739\n", - "2024-12-14 16:07:07.968 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/chartevents/.[0-668862).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/chartevents/.[0-668862).parquet_cache/locks/2024-12-14T16:07:07.928924.json\n", - "2024-12-14 16:07:07.969 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/outputevents/[0-9362).parquet\n", - "2024-12-14 16:07:07.970 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:07.969832. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:07.970 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet\n", - "2024-12-14 16:07:07.970 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:07.970 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/outputevents/[0-9362).parquet\n", - "2024-12-14 16:07:07.974 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting output\n", - "2024-12-14 16:07:07.975 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", - "2024-12-14 16:07:07.975 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 16:07:07.975 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:07.975 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:07.975 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/outputevents/[0-9362).parquet\n", - "2024-12-14 16:07:07.978 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008660\n", - "2024-12-14 16:07:07.978 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/outputevents/.[0-9362).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/outputevents/.[0-9362).parquet_cache/locks/2024-12-14T16:07:07.969832.json\n", - "2024-12-14 16:07:07.979 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/pharmacy/[0-15306).parquet\n", - "2024-12-14 16:07:07.979 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:07.979455. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:07.979 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet\n", - "2024-12-14 16:07:07.979 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:07.980 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/pharmacy/[0-15306).parquet\n", - "2024-12-14 16:07:07.983 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication_start\n", - "2024-12-14 16:07:07.984 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", - "2024-12-14 16:07:07.984 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", - "2024-12-14 16:07:07.984 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce([col(\"starttime\").str.strptime([String(raise)])]).is_not_null()\n", - "2024-12-14 16:07:07.984 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication_stop\n", - "2024-12-14 16:07:07.985 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", - "2024-12-14 16:07:07.985 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column stoptime in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", - "2024-12-14 16:07:07.985 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"stoptime\").str.strptime([String(raise)]).coalesce([col(\"stoptime\").str.strptime([String(raise)])]).is_not_null()\n", - "2024-12-14 16:07:07.985 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/pharmacy/[0-15306).parquet\n", - "2024-12-14 16:07:07.991 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.011555\n", - "2024-12-14 16:07:07.991 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/pharmacy/.[0-15306).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/pharmacy/.[0-15306).parquet_cache/locks/2024-12-14T16:07:07.979455.json\n", - "2024-12-14 16:07:07.991 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/hcpcsevents/[0-61).parquet\n", - "2024-12-14 16:07:07.992 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:07.991934. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:07.992 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet\n", - "2024-12-14 16:07:07.992 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:07.992 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/hcpcsevents/[0-61).parquet\n", - "2024-12-14 16:07:07.996 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting hcpcs\n", - "2024-12-14 16:07:07.996 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column short_description\n", - "2024-12-14 16:07:07.996 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", - "2024-12-14 16:07:07.996 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:07.996 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/hcpcsevents/[0-61).parquet\n", - "2024-12-14 16:07:07.998 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006670\n", - "2024-12-14 16:07:07.998 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/hcpcsevents/.[0-61).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/hcpcsevents/.[0-61).parquet_cache/locks/2024-12-14T16:07:07.991934.json\n", - "2024-12-14 16:07:07.999 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/drgcodes/[0-454).parquet\n", - "2024-12-14 16:07:07.999 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:07.999367. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:07.999 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet\n", - "2024-12-14 16:07:07.999 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:07.999 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/drgcodes/[0-454).parquet\n", - "2024-12-14 16:07:08.003 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drg\n", - "2024-12-14 16:07:08.003 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column description\n", - "2024-12-14 16:07:08.003 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drg_type\n", - "2024-12-14 16:07:08.004 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drg_code\n", - "2024-12-14 16:07:08.004 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column hadm_discharge_time in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.004 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hadm_discharge_time\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.004 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/drgcodes/[0-454).parquet\n", - "2024-12-14 16:07:08.006 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006814\n", - "2024-12-14 16:07:08.006 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/drgcodes/.[0-454).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/drgcodes/.[0-454).parquet_cache/locks/2024-12-14T16:07:07.999367.json\n", - "2024-12-14 16:07:08.007 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/procedures_icd/[0-722).parquet\n", - "2024-12-14 16:07:08.007 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.007341. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.007 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet\n", - "2024-12-14 16:07:08.007 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.007 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/procedures_icd/[0-722).parquet\n", - "2024-12-14 16:07:08.011 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting procedure\n", - "2024-12-14 16:07:08.012 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_code\n", - "2024-12-14 16:07:08.012 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_version\n", - "2024-12-14 16:07:08.012 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", - "2024-12-14 16:07:08.012 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.012 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/procedures_icd/[0-722).parquet\n", - "2024-12-14 16:07:08.014 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006829\n", - "2024-12-14 16:07:08.014 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/procedures_icd/.[0-722).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/procedures_icd/.[0-722).parquet_cache/locks/2024-12-14T16:07:08.007341.json\n", - "2024-12-14 16:07:08.014 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/labevents/[0-107727).parquet\n", - "2024-12-14 16:07:08.015 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.015034. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.015 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet\n", - "2024-12-14 16:07:08.015 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.015 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/labevents/[0-107727).parquet\n", - "2024-12-14 16:07:08.019 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting lab\n", - "2024-12-14 16:07:08.019 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", - "2024-12-14 16:07:08.019 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 16:07:08.020 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.020 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.020 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/labevents/[0-107727).parquet\n", - "2024-12-14 16:07:08.026 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.011136\n", - "2024-12-14 16:07:08.026 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/labevents/.[0-107727).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/labevents/.[0-107727).parquet_cache/locks/2024-12-14T16:07:08.015034.json\n", - "2024-12-14 16:07:08.027 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/inputevents/[0-20404).parquet\n", - "2024-12-14 16:07:08.027 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.027619. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.027 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet\n", - "2024-12-14 16:07:08.027 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.028 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/inputevents/[0-20404).parquet\n", - "2024-12-14 16:07:08.032 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting input_start\n", - "2024-12-14 16:07:08.033 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 16:07:08.033 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.033 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.033 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting input_end\n", - "2024-12-14 16:07:08.034 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 16:07:08.034 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column endtime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.035 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"endtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.035 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting subject_weight\n", - "2024-12-14 16:07:08.035 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.035 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.035 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/inputevents/[0-20404).parquet\n", - "2024-12-14 16:07:08.044 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.016992\n", - "2024-12-14 16:07:08.044 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/inputevents/.[0-20404).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/inputevents/.[0-20404).parquet_cache/locks/2024-12-14T16:07:08.027619.json\n", - "2024-12-14 16:07:08.045 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/icustays/[0-140).parquet\n", - "2024-12-14 16:07:08.045 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.045586. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.045 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet\n", - "2024-12-14 16:07:08.045 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.046 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/icustays/[0-140).parquet\n", - "2024-12-14 16:07:08.050 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting icu_admission\n", - "2024-12-14 16:07:08.050 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column first_careunit\n", - "2024-12-14 16:07:08.050 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column intime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.051 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"intime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.051 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting icu_discharge\n", - "2024-12-14 16:07:08.051 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column last_careunit\n", - "2024-12-14 16:07:08.051 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column outtime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.051 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"outtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.051 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/icustays/[0-140).parquet\n", - "2024-12-14 16:07:08.054 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008874\n", - "2024-12-14 16:07:08.054 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/icustays/.[0-140).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/icustays/.[0-140).parquet_cache/locks/2024-12-14T16:07:08.045586.json\n", - "2024-12-14 16:07:08.054 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/transfers/[0-1190).parquet\n", - "2024-12-14 16:07:08.055 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.055324. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.055 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet\n", - "2024-12-14 16:07:08.055 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.055 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/transfers/[0-1190).parquet\n", - "2024-12-14 16:07:08.059 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting transfer\n", - "2024-12-14 16:07:08.059 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column eventtype\n", - "2024-12-14 16:07:08.059 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column careunit\n", - "2024-12-14 16:07:08.059 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column intime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.060 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"intime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.060 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/transfers/[0-1190).parquet\n", - "2024-12-14 16:07:08.061 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006326\n", - "2024-12-14 16:07:08.061 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/transfers/.[0-1190).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/transfers/.[0-1190).parquet_cache/locks/2024-12-14T16:07:08.055324.json\n", - "2024-12-14 16:07:08.062 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/omr/[0-2964).parquet\n", - "2024-12-14 16:07:08.062 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.062419. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.062 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet\n", - "2024-12-14 16:07:08.062 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.062 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/omr/[0-2964).parquet\n", - "2024-12-14 16:07:08.066 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting omr\n", - "2024-12-14 16:07:08.066 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column result_name\n", - "2024-12-14 16:07:08.066 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", - "2024-12-14 16:07:08.066 | WARNING | MEDS_transforms.extract.convert_to_sharded_events:extract_event:507 - Source column 'col(result_value)' for event column text_value is always interpreted as a column name. Removing col() function call and setting source column to result_value.\n", - "2024-12-14 16:07:08.066 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:558 - Filtering out rows with null codes via col(\"result_name\").is_not_null()\n", - "2024-12-14 16:07:08.066 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.067 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/omr/[0-2964).parquet\n", - "2024-12-14 16:07:08.068 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006074\n", - "2024-12-14 16:07:08.068 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/omr/.[0-2964).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/omr/.[0-2964).parquet_cache/locks/2024-12-14T16:07:08.062419.json\n", - "2024-12-14 16:07:08.069 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/procedureevents/[0-1468).parquet\n", - "2024-12-14 16:07:08.070 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.069888. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.070 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet\n", - "2024-12-14 16:07:08.070 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.070 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/procedureevents/[0-1468).parquet\n", - "2024-12-14 16:07:08.074 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting start\n", - "2024-12-14 16:07:08.075 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 16:07:08.075 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.075 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.075 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting end\n", - "2024-12-14 16:07:08.076 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 16:07:08.076 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column endtime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.076 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"endtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.076 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/procedureevents/[0-1468).parquet\n", - "2024-12-14 16:07:08.079 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.009659\n", - "2024-12-14 16:07:08.079 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/procedureevents/.[0-1468).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/procedureevents/.[0-1468).parquet_cache/locks/2024-12-14T16:07:08.069888.json\n", - "2024-12-14 16:07:08.080 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/diagnoses_icd/[0-4506).parquet\n", - "2024-12-14 16:07:08.080 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.080456. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.080 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet\n", - "2024-12-14 16:07:08.080 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.080 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/diagnoses_icd/[0-4506).parquet\n", - "2024-12-14 16:07:08.084 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting diagnosis\n", - "2024-12-14 16:07:08.084 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_code\n", - "2024-12-14 16:07:08.085 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_version\n", - "2024-12-14 16:07:08.085 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column hadm_discharge_time in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.085 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hadm_discharge_time\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.085 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/diagnoses_icd/[0-4506).parquet\n", - "2024-12-14 16:07:08.086 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006417\n", - "2024-12-14 16:07:08.086 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/diagnoses_icd/.[0-4506).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/diagnoses_icd/.[0-4506).parquet_cache/locks/2024-12-14T16:07:08.080456.json\n", - "2024-12-14 16:07:08.087 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/emar/[0-35835).parquet\n", - "2024-12-14 16:07:08.087 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.087668. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.087 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet\n", - "2024-12-14 16:07:08.087 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.088 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/emar/[0-35835).parquet\n", - "2024-12-14 16:07:08.091 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication\n", - "2024-12-14 16:07:08.092 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", - "2024-12-14 16:07:08.092 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column event_txt\n", - "2024-12-14 16:07:08.092 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.092 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.092 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/emar/[0-35835).parquet\n", - "2024-12-14 16:07:08.096 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.009201\n", - "2024-12-14 16:07:08.096 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/emar/.[0-35835).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/emar/.[0-35835).parquet_cache/locks/2024-12-14T16:07:08.087668.json\n", - "2024-12-14 16:07:08.097 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/admissions/[0-275).parquet\n", - "2024-12-14 16:07:08.098 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.098026. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.098 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet\n", - "2024-12-14 16:07:08.098 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.098 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/admissions/[0-275).parquet\n", - "2024-12-14 16:07:08.102 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ed_registration\n", - "2024-12-14 16:07:08.102 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column edregtime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.102 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"edregtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.102 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ed_out\n", - "2024-12-14 16:07:08.102 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column edouttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.102 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"edouttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.102 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting admission\n", - "2024-12-14 16:07:08.103 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admission_type\n", - "2024-12-14 16:07:08.103 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admission_location\n", - "2024-12-14 16:07:08.103 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column admittime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.103 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"admittime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.103 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting discharge\n", - "2024-12-14 16:07:08.104 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column discharge_location\n", - "2024-12-14 16:07:08.104 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column dischtime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.104 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dischtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.104 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/admissions/[0-275).parquet\n", - "2024-12-14 16:07:08.108 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.010742\n", - "2024-12-14 16:07:08.108 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/admissions/.[0-275).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/admissions/.[0-275).parquet_cache/locks/2024-12-14T16:07:08.098026.json\n", - "2024-12-14 16:07:08.109 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/patients/[0-100).parquet\n", - "2024-12-14 16:07:08.109 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.109557. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.109 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet\n", - "2024-12-14 16:07:08.109 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.110 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/patients/[0-100).parquet\n", - "2024-12-14 16:07:08.113 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting gender\n", - "2024-12-14 16:07:08.114 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column gender\n", - "2024-12-14 16:07:08.114 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:493 - Adding null literate for time\n", - "2024-12-14 16:07:08.114 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting dob\n", - "2024-12-14 16:07:08.114 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column year_of_birth in possible formats %Y\n", - "2024-12-14 16:07:08.114 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"year_of_birth\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.114 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting death\n", - "2024-12-14 16:07:08.114 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column dod in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", - "2024-12-14 16:07:08.114 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dod\").str.strptime([String(raise)]).coalesce([col(\"dod\").str.strptime([String(raise)])]).is_not_null()\n", - "2024-12-14 16:07:08.114 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/patients/[0-100).parquet\n", - "2024-12-14 16:07:08.117 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008429\n", - "2024-12-14 16:07:08.118 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/patients/.[0-100).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/patients/.[0-100).parquet_cache/locks/2024-12-14T16:07:08.109557.json\n", - "2024-12-14 16:07:08.118 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/chartevents/[0-668862).parquet\n", - "2024-12-14 16:07:08.119 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.118982. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.119 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet\n", - "2024-12-14 16:07:08.119 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.119 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/chartevents/[0-668862).parquet\n", - "2024-12-14 16:07:08.123 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting event\n", - "2024-12-14 16:07:08.124 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", - "2024-12-14 16:07:08.124 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 16:07:08.124 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.124 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.125 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/chartevents/[0-668862).parquet\n", - "2024-12-14 16:07:08.143 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.024349\n", - "2024-12-14 16:07:08.143 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/chartevents/.[0-668862).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/chartevents/.[0-668862).parquet_cache/locks/2024-12-14T16:07:08.118982.json\n", - "2024-12-14 16:07:08.144 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/outputevents/[0-9362).parquet\n", - "2024-12-14 16:07:08.144 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.144266. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.144 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet\n", - "2024-12-14 16:07:08.144 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.144 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/outputevents/[0-9362).parquet\n", - "2024-12-14 16:07:08.148 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting output\n", - "2024-12-14 16:07:08.149 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", - "2024-12-14 16:07:08.149 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 16:07:08.149 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.149 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.149 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/outputevents/[0-9362).parquet\n", - "2024-12-14 16:07:08.152 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008152\n", - "2024-12-14 16:07:08.152 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/outputevents/.[0-9362).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/outputevents/.[0-9362).parquet_cache/locks/2024-12-14T16:07:08.144266.json\n", - "2024-12-14 16:07:08.153 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/pharmacy/[0-15306).parquet\n", - "2024-12-14 16:07:08.153 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.153615. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.153 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet\n", - "2024-12-14 16:07:08.153 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.154 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/pharmacy/[0-15306).parquet\n", - "2024-12-14 16:07:08.158 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication_start\n", - "2024-12-14 16:07:08.158 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", - "2024-12-14 16:07:08.158 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", - "2024-12-14 16:07:08.158 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce([col(\"starttime\").str.strptime([String(raise)])]).is_not_null()\n", - "2024-12-14 16:07:08.158 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication_stop\n", - "2024-12-14 16:07:08.159 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", - "2024-12-14 16:07:08.159 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column stoptime in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", - "2024-12-14 16:07:08.159 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"stoptime\").str.strptime([String(raise)]).coalesce([col(\"stoptime\").str.strptime([String(raise)])]).is_not_null()\n", - "2024-12-14 16:07:08.159 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/pharmacy/[0-15306).parquet\n", - "2024-12-14 16:07:08.164 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.010973\n", - "2024-12-14 16:07:08.164 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/pharmacy/.[0-15306).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/pharmacy/.[0-15306).parquet_cache/locks/2024-12-14T16:07:08.153615.json\n", - "2024-12-14 16:07:08.165 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/hcpcsevents/[0-61).parquet\n", - "2024-12-14 16:07:08.165 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.165366. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.165 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet\n", - "2024-12-14 16:07:08.165 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.165 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/hcpcsevents/[0-61).parquet\n", - "2024-12-14 16:07:08.169 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting hcpcs\n", - "2024-12-14 16:07:08.169 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column short_description\n", - "2024-12-14 16:07:08.169 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", - "2024-12-14 16:07:08.170 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.170 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/hcpcsevents/[0-61).parquet\n", - "2024-12-14 16:07:08.171 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.005897\n", - "2024-12-14 16:07:08.171 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/hcpcsevents/.[0-61).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/hcpcsevents/.[0-61).parquet_cache/locks/2024-12-14T16:07:08.165366.json\n", - "2024-12-14 16:07:08.171 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/drgcodes/[0-454).parquet\n", - "2024-12-14 16:07:08.172 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.172125. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.172 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet\n", - "2024-12-14 16:07:08.172 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.172 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/drgcodes/[0-454).parquet\n", - "2024-12-14 16:07:08.176 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drg\n", - "2024-12-14 16:07:08.176 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column description\n", - "2024-12-14 16:07:08.176 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drg_type\n", - "2024-12-14 16:07:08.176 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drg_code\n", - "2024-12-14 16:07:08.176 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column hadm_discharge_time in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.177 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hadm_discharge_time\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.177 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/drgcodes/[0-454).parquet\n", - "2024-12-14 16:07:08.178 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006754\n", - "2024-12-14 16:07:08.178 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/drgcodes/.[0-454).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/drgcodes/.[0-454).parquet_cache/locks/2024-12-14T16:07:08.172125.json\n", - "2024-12-14 16:07:08.179 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/procedures_icd/[0-722).parquet\n", - "2024-12-14 16:07:08.180 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.179926. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.180 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet\n", - "2024-12-14 16:07:08.180 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.180 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/procedures_icd/[0-722).parquet\n", - "2024-12-14 16:07:08.184 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting procedure\n", - "2024-12-14 16:07:08.184 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_code\n", - "2024-12-14 16:07:08.184 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_version\n", - "2024-12-14 16:07:08.184 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", - "2024-12-14 16:07:08.185 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.185 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/procedures_icd/[0-722).parquet\n", - "2024-12-14 16:07:08.186 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006649\n", - "2024-12-14 16:07:08.186 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/procedures_icd/.[0-722).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/procedures_icd/.[0-722).parquet_cache/locks/2024-12-14T16:07:08.179926.json\n", - "2024-12-14 16:07:08.187 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/labevents/[0-107727).parquet\n", - "2024-12-14 16:07:08.187 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.187501. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.187 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet\n", - "2024-12-14 16:07:08.187 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.188 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/labevents/[0-107727).parquet\n", - "2024-12-14 16:07:08.192 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting lab\n", - "2024-12-14 16:07:08.192 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", - "2024-12-14 16:07:08.192 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 16:07:08.192 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.193 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.193 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/labevents/[0-107727).parquet\n", - "2024-12-14 16:07:08.197 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.010020\n", - "2024-12-14 16:07:08.197 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/labevents/.[0-107727).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/labevents/.[0-107727).parquet_cache/locks/2024-12-14T16:07:08.187501.json\n", - "2024-12-14 16:07:08.198 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/inputevents/[0-20404).parquet\n", - "2024-12-14 16:07:08.199 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.198888. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.199 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet\n", - "2024-12-14 16:07:08.199 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.199 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/inputevents/[0-20404).parquet\n", - "2024-12-14 16:07:08.203 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting input_start\n", - "2024-12-14 16:07:08.204 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 16:07:08.204 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.204 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.204 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting input_end\n", - "2024-12-14 16:07:08.205 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 16:07:08.205 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column endtime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.206 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"endtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.206 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting subject_weight\n", - "2024-12-14 16:07:08.206 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.206 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.206 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/inputevents/[0-20404).parquet\n", - "2024-12-14 16:07:08.213 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.014441\n", - "2024-12-14 16:07:08.213 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/inputevents/.[0-20404).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/inputevents/.[0-20404).parquet_cache/locks/2024-12-14T16:07:08.198888.json\n", - "2024-12-14 16:07:08.213 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/icustays/[0-140).parquet\n", - "2024-12-14 16:07:08.214 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.214164. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.214 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet\n", - "2024-12-14 16:07:08.214 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.214 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/icustays/[0-140).parquet\n", - "2024-12-14 16:07:08.218 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting icu_admission\n", - "2024-12-14 16:07:08.219 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column first_careunit\n", - "2024-12-14 16:07:08.219 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column intime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.219 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"intime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.219 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting icu_discharge\n", - "2024-12-14 16:07:08.219 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column last_careunit\n", - "2024-12-14 16:07:08.219 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column outtime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.219 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"outtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.219 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/icustays/[0-140).parquet\n", - "2024-12-14 16:07:08.222 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007988\n", - "2024-12-14 16:07:08.222 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/icustays/.[0-140).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/icustays/.[0-140).parquet_cache/locks/2024-12-14T16:07:08.214164.json\n", - "2024-12-14 16:07:08.222 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/transfers/[0-1190).parquet\n", - "2024-12-14 16:07:08.223 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.223318. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.223 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet\n", - "2024-12-14 16:07:08.223 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.223 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/transfers/[0-1190).parquet\n", - "2024-12-14 16:07:08.227 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting transfer\n", - "2024-12-14 16:07:08.227 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column eventtype\n", - "2024-12-14 16:07:08.227 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column careunit\n", - "2024-12-14 16:07:08.228 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column intime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.228 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"intime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.228 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/transfers/[0-1190).parquet\n", - "2024-12-14 16:07:08.230 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006777\n", - "2024-12-14 16:07:08.230 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/transfers/.[0-1190).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/transfers/.[0-1190).parquet_cache/locks/2024-12-14T16:07:08.223318.json\n", - "2024-12-14 16:07:08.230 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/omr/[0-2964).parquet\n", - "2024-12-14 16:07:08.230 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.230725. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.230 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet\n", - "2024-12-14 16:07:08.231 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.231 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/omr/[0-2964).parquet\n", - "2024-12-14 16:07:08.234 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting omr\n", - "2024-12-14 16:07:08.234 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column result_name\n", - "2024-12-14 16:07:08.234 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", - "2024-12-14 16:07:08.235 | WARNING | MEDS_transforms.extract.convert_to_sharded_events:extract_event:507 - Source column 'col(result_value)' for event column text_value is always interpreted as a column name. Removing col() function call and setting source column to result_value.\n", - "2024-12-14 16:07:08.235 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:558 - Filtering out rows with null codes via col(\"result_name\").is_not_null()\n", - "2024-12-14 16:07:08.235 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.235 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/omr/[0-2964).parquet\n", - "2024-12-14 16:07:08.237 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006615\n", - "2024-12-14 16:07:08.237 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/omr/.[0-2964).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/omr/.[0-2964).parquet_cache/locks/2024-12-14T16:07:08.230725.json\n", - "2024-12-14 16:07:08.238 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/procedureevents/[0-1468).parquet\n", - "2024-12-14 16:07:08.238 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.238859. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.239 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet\n", - "2024-12-14 16:07:08.239 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.239 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/procedureevents/[0-1468).parquet\n", - "2024-12-14 16:07:08.244 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting start\n", - "2024-12-14 16:07:08.244 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 16:07:08.244 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.245 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.245 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting end\n", - "2024-12-14 16:07:08.245 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 16:07:08.245 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column endtime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.245 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"endtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.245 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/procedureevents/[0-1468).parquet\n", - "2024-12-14 16:07:08.257 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.017998\n", - "2024-12-14 16:07:08.257 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/procedureevents/.[0-1468).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/procedureevents/.[0-1468).parquet_cache/locks/2024-12-14T16:07:08.238859.json\n", - "2024-12-14 16:07:08.260 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/diagnoses_icd/[0-4506).parquet\n", - "2024-12-14 16:07:08.262 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.261546. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.263 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet\n", - "2024-12-14 16:07:08.263 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.264 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/diagnoses_icd/[0-4506).parquet\n", - "2024-12-14 16:07:08.279 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting diagnosis\n", - "2024-12-14 16:07:08.280 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_code\n", - "2024-12-14 16:07:08.280 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_version\n", - "2024-12-14 16:07:08.280 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column hadm_discharge_time in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.280 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hadm_discharge_time\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.281 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/diagnoses_icd/[0-4506).parquet\n", - "2024-12-14 16:07:08.286 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.024554\n", - "2024-12-14 16:07:08.286 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/diagnoses_icd/.[0-4506).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/diagnoses_icd/.[0-4506).parquet_cache/locks/2024-12-14T16:07:08.261546.json\n", - "2024-12-14 16:07:08.286 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/emar/[0-35835).parquet\n", - "2024-12-14 16:07:08.287 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.287021. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.287 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet\n", - "2024-12-14 16:07:08.287 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.287 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/emar/[0-35835).parquet\n", - "2024-12-14 16:07:08.291 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication\n", - "2024-12-14 16:07:08.291 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", - "2024-12-14 16:07:08.291 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column event_txt\n", - "2024-12-14 16:07:08.292 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.292 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.292 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/emar/[0-35835).parquet\n", - "2024-12-14 16:07:08.310 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.023385\n", - "2024-12-14 16:07:08.310 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/emar/.[0-35835).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/emar/.[0-35835).parquet_cache/locks/2024-12-14T16:07:08.287021.json\n", - "2024-12-14 16:07:08.311 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/admissions/[0-275).parquet\n", - "2024-12-14 16:07:08.311 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.311468. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.311 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet\n", - "2024-12-14 16:07:08.311 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.312 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/admissions/[0-275).parquet\n", - "2024-12-14 16:07:08.316 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ed_registration\n", - "2024-12-14 16:07:08.316 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column edregtime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.316 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"edregtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.316 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ed_out\n", - "2024-12-14 16:07:08.316 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column edouttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.316 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"edouttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.316 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting admission\n", - "2024-12-14 16:07:08.317 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admission_type\n", - "2024-12-14 16:07:08.317 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admission_location\n", - "2024-12-14 16:07:08.317 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column admittime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.317 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"admittime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.317 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting discharge\n", - "2024-12-14 16:07:08.317 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column discharge_location\n", - "2024-12-14 16:07:08.317 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column dischtime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.318 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dischtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.318 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/admissions/[0-275).parquet\n", - "2024-12-14 16:07:08.323 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.012409\n", - "2024-12-14 16:07:08.323 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/admissions/.[0-275).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/admissions/.[0-275).parquet_cache/locks/2024-12-14T16:07:08.311468.json\n", - "2024-12-14 16:07:08.324 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/patients/[0-100).parquet\n", - "2024-12-14 16:07:08.324 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.324714. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.324 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet\n", - "2024-12-14 16:07:08.325 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.325 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/patients/[0-100).parquet\n", - "2024-12-14 16:07:08.329 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting gender\n", - "2024-12-14 16:07:08.329 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column gender\n", - "2024-12-14 16:07:08.329 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:493 - Adding null literate for time\n", - "2024-12-14 16:07:08.329 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting dob\n", - "2024-12-14 16:07:08.329 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column year_of_birth in possible formats %Y\n", - "2024-12-14 16:07:08.329 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"year_of_birth\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.329 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting death\n", - "2024-12-14 16:07:08.330 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column dod in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", - "2024-12-14 16:07:08.330 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dod\").str.strptime([String(raise)]).coalesce([col(\"dod\").str.strptime([String(raise)])]).is_not_null()\n", - "2024-12-14 16:07:08.330 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/patients/[0-100).parquet\n", - "2024-12-14 16:07:08.332 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008220\n", - "2024-12-14 16:07:08.333 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/patients/.[0-100).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/patients/.[0-100).parquet_cache/locks/2024-12-14T16:07:08.324714.json\n", - "2024-12-14 16:07:08.333 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/chartevents/[0-668862).parquet\n", - "2024-12-14 16:07:08.333 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.333867. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.334 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet\n", - "2024-12-14 16:07:08.334 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.334 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/chartevents/[0-668862).parquet\n", - "2024-12-14 16:07:08.338 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting event\n", - "2024-12-14 16:07:08.339 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", - "2024-12-14 16:07:08.339 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 16:07:08.339 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.339 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.339 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/chartevents/[0-668862).parquet\n", - "2024-12-14 16:07:08.544 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.210303\n", - "2024-12-14 16:07:08.544 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/chartevents/.[0-668862).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/chartevents/.[0-668862).parquet_cache/locks/2024-12-14T16:07:08.333867.json\n", - "2024-12-14 16:07:08.544 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/outputevents/[0-9362).parquet\n", - "2024-12-14 16:07:08.545 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.545351. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.545 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet\n", - "2024-12-14 16:07:08.545 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.546 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/outputevents/[0-9362).parquet\n", - "2024-12-14 16:07:08.550 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting output\n", - "2024-12-14 16:07:08.551 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", - "2024-12-14 16:07:08.551 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 16:07:08.551 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.551 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.551 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/outputevents/[0-9362).parquet\n", - "2024-12-14 16:07:08.556 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.011422\n", - "2024-12-14 16:07:08.556 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/outputevents/.[0-9362).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/outputevents/.[0-9362).parquet_cache/locks/2024-12-14T16:07:08.545351.json\n", - "2024-12-14 16:07:08.557 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/pharmacy/[0-15306).parquet\n", - "2024-12-14 16:07:08.558 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.557861. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.558 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet\n", - "2024-12-14 16:07:08.558 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.558 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/pharmacy/[0-15306).parquet\n", - "2024-12-14 16:07:08.562 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication_start\n", - "2024-12-14 16:07:08.562 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", - "2024-12-14 16:07:08.562 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", - "2024-12-14 16:07:08.563 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce([col(\"starttime\").str.strptime([String(raise)])]).is_not_null()\n", - "2024-12-14 16:07:08.563 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication_stop\n", - "2024-12-14 16:07:08.563 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", - "2024-12-14 16:07:08.563 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column stoptime in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", - "2024-12-14 16:07:08.564 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"stoptime\").str.strptime([String(raise)]).coalesce([col(\"stoptime\").str.strptime([String(raise)])]).is_not_null()\n", - "2024-12-14 16:07:08.564 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/pharmacy/[0-15306).parquet\n", - "2024-12-14 16:07:08.581 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.023592\n", - "2024-12-14 16:07:08.581 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/pharmacy/.[0-15306).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/pharmacy/.[0-15306).parquet_cache/locks/2024-12-14T16:07:08.557861.json\n", - "2024-12-14 16:07:08.582 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/hcpcsevents/[0-61).parquet\n", - "2024-12-14 16:07:08.582 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.582313. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.582 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet\n", - "2024-12-14 16:07:08.582 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.582 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/hcpcsevents/[0-61).parquet\n", - "2024-12-14 16:07:08.586 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting hcpcs\n", - "2024-12-14 16:07:08.587 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column short_description\n", - "2024-12-14 16:07:08.587 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", - "2024-12-14 16:07:08.587 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.587 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/hcpcsevents/[0-61).parquet\n", - "2024-12-14 16:07:08.588 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006400\n", - "2024-12-14 16:07:08.588 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/hcpcsevents/.[0-61).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/hcpcsevents/.[0-61).parquet_cache/locks/2024-12-14T16:07:08.582313.json\n", - "2024-12-14 16:07:08.589 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/drgcodes/[0-454).parquet\n", - "2024-12-14 16:07:08.589 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.589478. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.589 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet\n", - "2024-12-14 16:07:08.589 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.589 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/drgcodes/[0-454).parquet\n", - "2024-12-14 16:07:08.593 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drg\n", - "2024-12-14 16:07:08.593 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column description\n", - "2024-12-14 16:07:08.594 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drg_type\n", - "2024-12-14 16:07:08.594 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drg_code\n", - "2024-12-14 16:07:08.594 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column hadm_discharge_time in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.594 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hadm_discharge_time\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.594 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/drgcodes/[0-454).parquet\n", - "2024-12-14 16:07:08.596 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007142\n", - "2024-12-14 16:07:08.596 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/drgcodes/.[0-454).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/drgcodes/.[0-454).parquet_cache/locks/2024-12-14T16:07:08.589478.json\n", - "2024-12-14 16:07:08.597 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/procedures_icd/[0-722).parquet\n", - "2024-12-14 16:07:08.597 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.597686. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.597 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet\n", - "2024-12-14 16:07:08.598 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.598 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/procedures_icd/[0-722).parquet\n", - "2024-12-14 16:07:08.602 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting procedure\n", - "2024-12-14 16:07:08.602 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_code\n", - "2024-12-14 16:07:08.602 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_version\n", - "2024-12-14 16:07:08.602 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", - "2024-12-14 16:07:08.602 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.603 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/procedures_icd/[0-722).parquet\n", - "2024-12-14 16:07:08.605 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007402\n", - "2024-12-14 16:07:08.605 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/procedures_icd/.[0-722).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/procedures_icd/.[0-722).parquet_cache/locks/2024-12-14T16:07:08.597686.json\n", - "2024-12-14 16:07:08.605 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/labevents/[0-107727).parquet\n", - "2024-12-14 16:07:08.606 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.606130. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.606 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet\n", - "2024-12-14 16:07:08.606 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.606 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/labevents/[0-107727).parquet\n", - "2024-12-14 16:07:08.610 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting lab\n", - "2024-12-14 16:07:08.611 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", - "2024-12-14 16:07:08.611 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 16:07:08.611 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.611 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.611 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/labevents/[0-107727).parquet\n", - "2024-12-14 16:07:08.642 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.036436\n", - "2024-12-14 16:07:08.642 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/labevents/.[0-107727).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/labevents/.[0-107727).parquet_cache/locks/2024-12-14T16:07:08.606130.json\n", - "2024-12-14 16:07:08.643 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/inputevents/[0-20404).parquet\n", - "2024-12-14 16:07:08.644 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.644292. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.644 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet\n", - "2024-12-14 16:07:08.644 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.644 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/inputevents/[0-20404).parquet\n", - "2024-12-14 16:07:08.649 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting input_start\n", - "2024-12-14 16:07:08.649 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 16:07:08.649 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.650 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.650 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting input_end\n", - "2024-12-14 16:07:08.651 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 16:07:08.651 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column endtime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.651 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"endtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.651 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting subject_weight\n", - "2024-12-14 16:07:08.652 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.652 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.652 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/inputevents/[0-20404).parquet\n", - "2024-12-14 16:07:08.676 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.032631\n", - "2024-12-14 16:07:08.677 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/inputevents/.[0-20404).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/inputevents/.[0-20404).parquet_cache/locks/2024-12-14T16:07:08.644292.json\n", - "2024-12-14 16:07:08.678 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/icustays/[0-140).parquet\n", - "2024-12-14 16:07:08.678 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.678769. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.679 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet\n", - "2024-12-14 16:07:08.679 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.679 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/icustays/[0-140).parquet\n", - "2024-12-14 16:07:08.683 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting icu_admission\n", - "2024-12-14 16:07:08.683 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column first_careunit\n", - "2024-12-14 16:07:08.683 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column intime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.683 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"intime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.683 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting icu_discharge\n", - "2024-12-14 16:07:08.684 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column last_careunit\n", - "2024-12-14 16:07:08.684 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column outtime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.684 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"outtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.684 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/icustays/[0-140).parquet\n", - "2024-12-14 16:07:08.686 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008114\n", - "2024-12-14 16:07:08.686 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/icustays/.[0-140).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/icustays/.[0-140).parquet_cache/locks/2024-12-14T16:07:08.678769.json\n", - "2024-12-14 16:07:08.687 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:823 - Subsharded into converted events.\n", + "2024-12-14 17:15:32.904 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:32.905 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/labevents/[0-107727).parquet\n", + "2024-12-14 17:15:32.915 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.017034\n", + "2024-12-14 17:15:32.915 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/labevents/.[0-107727).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/labevents/.[0-107727).parquet_cache/locks/2024-12-14T17:15:32.898624.json\n", + "2024-12-14 17:15:32.916 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 17:15:32.917 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:32.917013. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:32.917 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 17:15:32.917 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:32.917 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 17:15:32.921 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting start\n", + "2024-12-14 17:15:32.922 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 17:15:32.922 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:32.922 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:32.922 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting end\n", + "2024-12-14 17:15:32.923 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 17:15:32.923 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column endtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:32.923 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"endtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:32.923 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 17:15:32.926 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.009541\n", + "2024-12-14 17:15:32.926 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/procedureevents/.[0-1468).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/procedureevents/.[0-1468).parquet_cache/locks/2024-12-14T17:15:32.917013.json\n", + "2024-12-14 17:15:32.927 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 17:15:32.927 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:32.927549. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:32.927 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 17:15:32.927 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:32.928 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 17:15:32.931 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication_start\n", + "2024-12-14 17:15:32.932 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", + "2024-12-14 17:15:32.932 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", + "2024-12-14 17:15:32.932 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce([col(\"starttime\").str.strptime([String(raise)])]).is_not_null()\n", + "2024-12-14 17:15:32.932 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication_stop\n", + "2024-12-14 17:15:32.933 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", + "2024-12-14 17:15:32.933 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column stoptime in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", + "2024-12-14 17:15:32.933 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"stoptime\").str.strptime([String(raise)]).coalesce([col(\"stoptime\").str.strptime([String(raise)])]).is_not_null()\n", + "2024-12-14 17:15:32.933 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 17:15:32.940 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.013041\n", + "2024-12-14 17:15:32.940 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/pharmacy/.[0-15306).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/pharmacy/.[0-15306).parquet_cache/locks/2024-12-14T17:15:32.927549.json\n", + "2024-12-14 17:15:32.941 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/admissions/[0-275).parquet\n", + "2024-12-14 17:15:32.941 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:32.941739. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:32.942 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet\n", + "2024-12-14 17:15:32.942 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:32.942 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/admissions/[0-275).parquet\n", + "2024-12-14 17:15:32.946 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ed_registration\n", + "2024-12-14 17:15:32.946 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column edregtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:32.946 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"edregtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:32.946 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ed_out\n", + "2024-12-14 17:15:32.946 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column edouttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:32.946 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"edouttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:32.946 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting admission\n", + "2024-12-14 17:15:32.947 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admission_location\n", + "2024-12-14 17:15:32.947 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admission_type\n", + "2024-12-14 17:15:32.947 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column admittime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:32.947 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"admittime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:32.947 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting discharge\n", + "2024-12-14 17:15:32.948 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column discharge_location\n", + "2024-12-14 17:15:32.948 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column dischtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:32.948 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dischtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:32.948 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/admissions/[0-275).parquet\n", + "2024-12-14 17:15:32.955 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.013267\n", + "2024-12-14 17:15:32.955 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/admissions/.[0-275).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/admissions/.[0-275).parquet_cache/locks/2024-12-14T17:15:32.941739.json\n", + "2024-12-14 17:15:32.955 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/transfers/[0-1190).parquet\n", + "2024-12-14 17:15:32.956 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:32.955912. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:32.956 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet\n", + "2024-12-14 17:15:32.956 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:32.956 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/transfers/[0-1190).parquet\n", + "2024-12-14 17:15:32.973 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting transfer\n", + "2024-12-14 17:15:32.975 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column careunit\n", + "2024-12-14 17:15:32.976 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column eventtype\n", + "2024-12-14 17:15:32.977 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column intime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:32.978 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"intime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:32.979 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/transfers/[0-1190).parquet\n", + "2024-12-14 17:15:32.981 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.025516\n", + "2024-12-14 17:15:32.981 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/transfers/.[0-1190).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/transfers/.[0-1190).parquet_cache/locks/2024-12-14T17:15:32.955912.json\n", + "2024-12-14 17:15:32.983 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/chartevents/[0-668862).parquet\n", + "2024-12-14 17:15:32.986 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:32.984685. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:32.987 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet\n", + "2024-12-14 17:15:32.987 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:32.987 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/chartevents/[0-668862).parquet\n", + "2024-12-14 17:15:32.991 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting event\n", + "2024-12-14 17:15:32.992 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 17:15:32.992 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", + "2024-12-14 17:15:32.992 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:32.992 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:32.992 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/chartevents/[0-668862).parquet\n", + "2024-12-14 17:15:33.040 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.056118\n", + "2024-12-14 17:15:33.040 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/chartevents/.[0-668862).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/chartevents/.[0-668862).parquet_cache/locks/2024-12-14T17:15:32.984685.json\n", + "2024-12-14 17:15:33.041 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/omr/[0-2964).parquet\n", + "2024-12-14 17:15:33.041 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.041805. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.042 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet\n", + "2024-12-14 17:15:33.042 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.042 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/omr/[0-2964).parquet\n", + "2024-12-14 17:15:33.046 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting omr\n", + "2024-12-14 17:15:33.046 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column result_name\n", + "2024-12-14 17:15:33.046 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", + "2024-12-14 17:15:33.046 | WARNING | MEDS_transforms.extract.convert_to_sharded_events:extract_event:507 - Source column 'col(result_value)' for event column text_value is always interpreted as a column name. Removing col() function call and setting source column to result_value.\n", + "2024-12-14 17:15:33.046 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:558 - Filtering out rows with null codes via col(\"result_name\").is_not_null()\n", + "2024-12-14 17:15:33.047 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.047 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/omr/[0-2964).parquet\n", + "2024-12-14 17:15:33.049 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007480\n", + "2024-12-14 17:15:33.049 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/omr/.[0-2964).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/omr/.[0-2964).parquet_cache/locks/2024-12-14T17:15:33.041805.json\n", + "2024-12-14 17:15:33.050 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 17:15:33.050 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.050716. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.051 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 17:15:33.051 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.051 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/outputevents/[0-9362).parquet\n", + "2024-12-14 17:15:33.055 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting output\n", + "2024-12-14 17:15:33.056 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 17:15:33.056 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", + "2024-12-14 17:15:33.056 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.056 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.056 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 17:15:33.059 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008712\n", + "2024-12-14 17:15:33.059 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/outputevents/.[0-9362).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/outputevents/.[0-9362).parquet_cache/locks/2024-12-14T17:15:33.050716.json\n", + "2024-12-14 17:15:33.060 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/emar/[0-35835).parquet\n", + "2024-12-14 17:15:33.060 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.060372. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.060 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet\n", + "2024-12-14 17:15:33.060 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.061 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/emar/[0-35835).parquet\n", + "2024-12-14 17:15:33.065 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication\n", + "2024-12-14 17:15:33.066 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column event_txt\n", + "2024-12-14 17:15:33.066 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", + "2024-12-14 17:15:33.066 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.066 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.066 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/emar/[0-35835).parquet\n", + "2024-12-14 17:15:33.071 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.011269\n", + "2024-12-14 17:15:33.071 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/emar/.[0-35835).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/emar/.[0-35835).parquet_cache/locks/2024-12-14T17:15:33.060372.json\n", + "2024-12-14 17:15:33.072 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/patients/[0-100).parquet\n", + "2024-12-14 17:15:33.072 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.072660. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.072 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet\n", + "2024-12-14 17:15:33.072 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.073 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/patients/[0-100).parquet\n", + "2024-12-14 17:15:33.077 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting gender\n", + "2024-12-14 17:15:33.077 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column gender\n", + "2024-12-14 17:15:33.077 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:493 - Adding null literate for time\n", + "2024-12-14 17:15:33.077 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting dob\n", + "2024-12-14 17:15:33.077 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column year_of_birth in possible formats %Y\n", + "2024-12-14 17:15:33.077 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"year_of_birth\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.077 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting death\n", + "2024-12-14 17:15:33.077 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column dod in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", + "2024-12-14 17:15:33.077 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dod\").str.strptime([String(raise)]).coalesce([col(\"dod\").str.strptime([String(raise)])]).is_not_null()\n", + "2024-12-14 17:15:33.078 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/patients/[0-100).parquet\n", + "2024-12-14 17:15:33.080 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007929\n", + "2024-12-14 17:15:33.080 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/patients/.[0-100).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/patients/.[0-100).parquet_cache/locks/2024-12-14T17:15:33.072660.json\n", + "2024-12-14 17:15:33.081 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 17:15:33.081 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.081784. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.082 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 17:15:33.082 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.082 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 17:15:33.086 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting procedure\n", + "2024-12-14 17:15:33.087 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_version\n", + "2024-12-14 17:15:33.087 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_code\n", + "2024-12-14 17:15:33.087 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", + "2024-12-14 17:15:33.087 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.087 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 17:15:33.089 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007410\n", + "2024-12-14 17:15:33.089 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/procedures_icd/.[0-722).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/procedures_icd/.[0-722).parquet_cache/locks/2024-12-14T17:15:33.081784.json\n", + "2024-12-14 17:15:33.089 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 17:15:33.090 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.090034. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.090 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 17:15:33.090 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.090 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 17:15:33.094 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting hcpcs\n", + "2024-12-14 17:15:33.094 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column short_description\n", + "2024-12-14 17:15:33.094 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", + "2024-12-14 17:15:33.095 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.095 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 17:15:33.096 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006395\n", + "2024-12-14 17:15:33.096 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/hcpcsevents/.[0-61).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/hcpcsevents/.[0-61).parquet_cache/locks/2024-12-14T17:15:33.090034.json\n", + "2024-12-14 17:15:33.096 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/icustays/[0-140).parquet\n", + "2024-12-14 17:15:33.097 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.097353. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.097 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet\n", + "2024-12-14 17:15:33.097 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.097 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/icustays/[0-140).parquet\n", + "2024-12-14 17:15:33.101 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting icu_admission\n", + "2024-12-14 17:15:33.102 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column first_careunit\n", + "2024-12-14 17:15:33.102 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column intime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.102 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"intime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.102 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting icu_discharge\n", + "2024-12-14 17:15:33.102 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column last_careunit\n", + "2024-12-14 17:15:33.102 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column outtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.102 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"outtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.103 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/icustays/[0-140).parquet\n", + "2024-12-14 17:15:33.105 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007910\n", + "2024-12-14 17:15:33.105 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/icustays/.[0-140).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/icustays/.[0-140).parquet_cache/locks/2024-12-14T17:15:33.097353.json\n", + "2024-12-14 17:15:33.105 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 17:15:33.106 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.106087. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.106 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 17:15:33.106 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.106 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 17:15:33.110 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drg\n", + "2024-12-14 17:15:33.110 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drg_type\n", + "2024-12-14 17:15:33.110 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drg_code\n", + "2024-12-14 17:15:33.110 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column description\n", + "2024-12-14 17:15:33.111 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column hadm_discharge_time in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.111 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hadm_discharge_time\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.111 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 17:15:33.113 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006962\n", + "2024-12-14 17:15:33.113 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/drgcodes/.[0-454).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/drgcodes/.[0-454).parquet_cache/locks/2024-12-14T17:15:33.106087.json\n", + "2024-12-14 17:15:33.114 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/inputevents/[0-20404).parquet\n", + "2024-12-14 17:15:33.114 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.114649. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.114 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet\n", + "2024-12-14 17:15:33.114 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.115 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/inputevents/[0-20404).parquet\n", + "2024-12-14 17:15:33.119 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting input_start\n", + "2024-12-14 17:15:33.120 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 17:15:33.120 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.120 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.120 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting input_end\n", + "2024-12-14 17:15:33.121 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 17:15:33.121 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column endtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.122 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"endtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.122 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting subject_weight\n", + "2024-12-14 17:15:33.122 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.122 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.122 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/inputevents/[0-20404).parquet\n", + "2024-12-14 17:15:33.134 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.020059\n", + "2024-12-14 17:15:33.134 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/inputevents/.[0-20404).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/inputevents/.[0-20404).parquet_cache/locks/2024-12-14T17:15:33.114649.json\n", + "2024-12-14 17:15:33.135 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 17:15:33.135 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.135615. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.135 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 17:15:33.135 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.136 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 17:15:33.140 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting diagnosis\n", + "2024-12-14 17:15:33.140 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_version\n", + "2024-12-14 17:15:33.140 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_code\n", + "2024-12-14 17:15:33.140 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column hadm_discharge_time in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.140 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hadm_discharge_time\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.140 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 17:15:33.142 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007186\n", + "2024-12-14 17:15:33.142 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/diagnoses_icd/.[0-4506).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/diagnoses_icd/.[0-4506).parquet_cache/locks/2024-12-14T17:15:33.135615.json\n", + "2024-12-14 17:15:33.143 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/labevents/[0-107727).parquet\n", + "2024-12-14 17:15:33.143 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.143737. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.143 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet\n", + "2024-12-14 17:15:33.144 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.144 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/labevents/[0-107727).parquet\n", + "2024-12-14 17:15:33.148 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting lab\n", + "2024-12-14 17:15:33.149 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 17:15:33.149 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", + "2024-12-14 17:15:33.149 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.149 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.149 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/labevents/[0-107727).parquet\n", + "2024-12-14 17:15:33.163 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.020243\n", + "2024-12-14 17:15:33.164 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/labevents/.[0-107727).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/labevents/.[0-107727).parquet_cache/locks/2024-12-14T17:15:33.143737.json\n", + "2024-12-14 17:15:33.165 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 17:15:33.165 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.165607. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.165 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 17:15:33.165 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.166 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 17:15:33.170 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting start\n", + "2024-12-14 17:15:33.171 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 17:15:33.171 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.171 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.171 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting end\n", + "2024-12-14 17:15:33.172 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 17:15:33.172 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column endtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.172 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"endtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.172 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 17:15:33.175 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.009748\n", + "2024-12-14 17:15:33.175 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/procedureevents/.[0-1468).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/procedureevents/.[0-1468).parquet_cache/locks/2024-12-14T17:15:33.165607.json\n", + "2024-12-14 17:15:33.176 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 17:15:33.176 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.176289. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.176 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 17:15:33.176 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.176 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 17:15:33.180 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication_start\n", + "2024-12-14 17:15:33.181 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", + "2024-12-14 17:15:33.181 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", + "2024-12-14 17:15:33.181 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce([col(\"starttime\").str.strptime([String(raise)])]).is_not_null()\n", + "2024-12-14 17:15:33.181 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication_stop\n", + "2024-12-14 17:15:33.181 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", + "2024-12-14 17:15:33.181 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column stoptime in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", + "2024-12-14 17:15:33.182 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"stoptime\").str.strptime([String(raise)]).coalesce([col(\"stoptime\").str.strptime([String(raise)])]).is_not_null()\n", + "2024-12-14 17:15:33.182 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 17:15:33.190 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.014067\n", + "2024-12-14 17:15:33.190 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/pharmacy/.[0-15306).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/pharmacy/.[0-15306).parquet_cache/locks/2024-12-14T17:15:33.176289.json\n", + "2024-12-14 17:15:33.191 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/admissions/[0-275).parquet\n", + "2024-12-14 17:15:33.191 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.191592. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.191 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet\n", + "2024-12-14 17:15:33.191 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.192 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/admissions/[0-275).parquet\n", + "2024-12-14 17:15:33.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ed_registration\n", + "2024-12-14 17:15:33.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column edregtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"edregtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ed_out\n", + "2024-12-14 17:15:33.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column edouttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"edouttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting admission\n", + "2024-12-14 17:15:33.197 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admission_location\n", + "2024-12-14 17:15:33.197 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admission_type\n", + "2024-12-14 17:15:33.197 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column admittime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.197 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"admittime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.197 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting discharge\n", + "2024-12-14 17:15:33.198 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column discharge_location\n", + "2024-12-14 17:15:33.198 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column dischtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.198 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dischtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.198 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/admissions/[0-275).parquet\n", + "2024-12-14 17:15:33.202 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.011248\n", + "2024-12-14 17:15:33.202 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/admissions/.[0-275).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/admissions/.[0-275).parquet_cache/locks/2024-12-14T17:15:33.191592.json\n", + "2024-12-14 17:15:33.203 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/transfers/[0-1190).parquet\n", + "2024-12-14 17:15:33.203 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.203695. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.203 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet\n", + "2024-12-14 17:15:33.204 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.204 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/transfers/[0-1190).parquet\n", + "2024-12-14 17:15:33.207 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting transfer\n", + "2024-12-14 17:15:33.208 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column careunit\n", + "2024-12-14 17:15:33.208 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column eventtype\n", + "2024-12-14 17:15:33.208 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column intime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.208 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"intime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.208 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/transfers/[0-1190).parquet\n", + "2024-12-14 17:15:33.210 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006506\n", + "2024-12-14 17:15:33.210 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/transfers/.[0-1190).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/transfers/.[0-1190).parquet_cache/locks/2024-12-14T17:15:33.203695.json\n", + "2024-12-14 17:15:33.210 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/chartevents/[0-668862).parquet\n", + "2024-12-14 17:15:33.211 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.211185. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.211 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet\n", + "2024-12-14 17:15:33.211 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.211 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/chartevents/[0-668862).parquet\n", + "2024-12-14 17:15:33.215 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting event\n", + "2024-12-14 17:15:33.216 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 17:15:33.216 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", + "2024-12-14 17:15:33.216 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.216 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.216 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/chartevents/[0-668862).parquet\n", + "2024-12-14 17:15:33.286 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.075577\n", + "2024-12-14 17:15:33.286 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/chartevents/.[0-668862).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/chartevents/.[0-668862).parquet_cache/locks/2024-12-14T17:15:33.211185.json\n", + "2024-12-14 17:15:33.287 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/omr/[0-2964).parquet\n", + "2024-12-14 17:15:33.287 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.287673. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.287 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet\n", + "2024-12-14 17:15:33.288 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.288 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/omr/[0-2964).parquet\n", + "2024-12-14 17:15:33.291 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting omr\n", + "2024-12-14 17:15:33.292 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column result_name\n", + "2024-12-14 17:15:33.292 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", + "2024-12-14 17:15:33.292 | WARNING | MEDS_transforms.extract.convert_to_sharded_events:extract_event:507 - Source column 'col(result_value)' for event column text_value is always interpreted as a column name. Removing col() function call and setting source column to result_value.\n", + "2024-12-14 17:15:33.292 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:558 - Filtering out rows with null codes via col(\"result_name\").is_not_null()\n", + "2024-12-14 17:15:33.292 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.292 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/omr/[0-2964).parquet\n", + "2024-12-14 17:15:33.294 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007073\n", + "2024-12-14 17:15:33.294 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/omr/.[0-2964).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/omr/.[0-2964).parquet_cache/locks/2024-12-14T17:15:33.287673.json\n", + "2024-12-14 17:15:33.295 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 17:15:33.295 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.295797. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.296 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 17:15:33.296 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.296 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/outputevents/[0-9362).parquet\n", + "2024-12-14 17:15:33.300 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting output\n", + "2024-12-14 17:15:33.301 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 17:15:33.301 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", + "2024-12-14 17:15:33.301 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.301 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.301 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 17:15:33.304 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008910\n", + "2024-12-14 17:15:33.304 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/outputevents/.[0-9362).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/outputevents/.[0-9362).parquet_cache/locks/2024-12-14T17:15:33.295797.json\n", + "2024-12-14 17:15:33.305 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/emar/[0-35835).parquet\n", + "2024-12-14 17:15:33.305 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.305567. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.305 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet\n", + "2024-12-14 17:15:33.305 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.306 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/emar/[0-35835).parquet\n", + "2024-12-14 17:15:33.310 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication\n", + "2024-12-14 17:15:33.311 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column event_txt\n", + "2024-12-14 17:15:33.311 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", + "2024-12-14 17:15:33.311 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.311 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.311 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/emar/[0-35835).parquet\n", + "2024-12-14 17:15:33.318 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.013312\n", + "2024-12-14 17:15:33.318 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/emar/.[0-35835).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/emar/.[0-35835).parquet_cache/locks/2024-12-14T17:15:33.305567.json\n", + "2024-12-14 17:15:33.319 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/patients/[0-100).parquet\n", + "2024-12-14 17:15:33.320 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.319931. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.320 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet\n", + "2024-12-14 17:15:33.320 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.320 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/patients/[0-100).parquet\n", + "2024-12-14 17:15:33.324 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting gender\n", + "2024-12-14 17:15:33.324 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column gender\n", + "2024-12-14 17:15:33.324 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:493 - Adding null literate for time\n", + "2024-12-14 17:15:33.324 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting dob\n", + "2024-12-14 17:15:33.324 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column year_of_birth in possible formats %Y\n", + "2024-12-14 17:15:33.324 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"year_of_birth\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.324 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting death\n", + "2024-12-14 17:15:33.325 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column dod in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", + "2024-12-14 17:15:33.325 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dod\").str.strptime([String(raise)]).coalesce([col(\"dod\").str.strptime([String(raise)])]).is_not_null()\n", + "2024-12-14 17:15:33.325 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/patients/[0-100).parquet\n", + "2024-12-14 17:15:33.328 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008224\n", + "2024-12-14 17:15:33.328 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/patients/.[0-100).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/patients/.[0-100).parquet_cache/locks/2024-12-14T17:15:33.319931.json\n", + "2024-12-14 17:15:33.328 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 17:15:33.329 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.329021. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.329 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 17:15:33.329 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.329 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 17:15:33.333 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting procedure\n", + "2024-12-14 17:15:33.333 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_version\n", + "2024-12-14 17:15:33.333 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_code\n", + "2024-12-14 17:15:33.333 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", + "2024-12-14 17:15:33.334 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.334 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 17:15:33.336 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006998\n", + "2024-12-14 17:15:33.336 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/procedures_icd/.[0-722).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/procedures_icd/.[0-722).parquet_cache/locks/2024-12-14T17:15:33.329021.json\n", + "2024-12-14 17:15:33.336 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 17:15:33.337 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.337131. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.337 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 17:15:33.337 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.337 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 17:15:33.341 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting hcpcs\n", + "2024-12-14 17:15:33.342 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column short_description\n", + "2024-12-14 17:15:33.342 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", + "2024-12-14 17:15:33.342 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.342 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 17:15:33.344 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007560\n", + "2024-12-14 17:15:33.344 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/hcpcsevents/.[0-61).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/hcpcsevents/.[0-61).parquet_cache/locks/2024-12-14T17:15:33.337131.json\n", + "2024-12-14 17:15:33.345 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/icustays/[0-140).parquet\n", + "2024-12-14 17:15:33.345 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.345648. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.345 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet\n", + "2024-12-14 17:15:33.345 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.346 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/icustays/[0-140).parquet\n", + "2024-12-14 17:15:33.350 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting icu_admission\n", + "2024-12-14 17:15:33.350 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column first_careunit\n", + "2024-12-14 17:15:33.350 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column intime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.350 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"intime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.350 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting icu_discharge\n", + "2024-12-14 17:15:33.351 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column last_careunit\n", + "2024-12-14 17:15:33.351 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column outtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.351 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"outtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.352 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/icustays/[0-140).parquet\n", + "2024-12-14 17:15:33.363 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.017417\n", + "2024-12-14 17:15:33.363 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/icustays/.[0-140).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/icustays/.[0-140).parquet_cache/locks/2024-12-14T17:15:33.345648.json\n", + "2024-12-14 17:15:33.366 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 17:15:33.369 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.368406. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.371 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 17:15:33.371 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.373 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 17:15:33.386 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drg\n", + "2024-12-14 17:15:33.387 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drg_type\n", + "2024-12-14 17:15:33.387 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drg_code\n", + "2024-12-14 17:15:33.387 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column description\n", + "2024-12-14 17:15:33.387 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column hadm_discharge_time in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.387 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hadm_discharge_time\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.387 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 17:15:33.390 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.022108\n", + "2024-12-14 17:15:33.390 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/drgcodes/.[0-454).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/drgcodes/.[0-454).parquet_cache/locks/2024-12-14T17:15:33.368406.json\n", + "2024-12-14 17:15:33.391 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/inputevents/[0-20404).parquet\n", + "2024-12-14 17:15:33.392 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.392174. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.392 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet\n", + "2024-12-14 17:15:33.392 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.392 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/inputevents/[0-20404).parquet\n", + "2024-12-14 17:15:33.397 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting input_start\n", + "2024-12-14 17:15:33.398 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 17:15:33.398 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.398 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.398 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting input_end\n", + "2024-12-14 17:15:33.399 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 17:15:33.399 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column endtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.400 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"endtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.400 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting subject_weight\n", + "2024-12-14 17:15:33.400 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.401 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.401 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/inputevents/[0-20404).parquet\n", + "2024-12-14 17:15:33.420 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.028113\n", + "2024-12-14 17:15:33.420 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/inputevents/.[0-20404).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/inputevents/.[0-20404).parquet_cache/locks/2024-12-14T17:15:33.392174.json\n", + "2024-12-14 17:15:33.421 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 17:15:33.421 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.421362. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.421 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 17:15:33.421 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.422 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 17:15:33.426 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting diagnosis\n", + "2024-12-14 17:15:33.426 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_version\n", + "2024-12-14 17:15:33.426 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_code\n", + "2024-12-14 17:15:33.426 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column hadm_discharge_time in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.426 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hadm_discharge_time\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.426 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 17:15:33.429 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008179\n", + "2024-12-14 17:15:33.429 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/diagnoses_icd/.[0-4506).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/diagnoses_icd/.[0-4506).parquet_cache/locks/2024-12-14T17:15:33.421362.json\n", + "2024-12-14 17:15:33.430 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/labevents/[0-107727).parquet\n", + "2024-12-14 17:15:33.430 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.430538. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.430 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet\n", + "2024-12-14 17:15:33.430 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.431 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/labevents/[0-107727).parquet\n", + "2024-12-14 17:15:33.436 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting lab\n", + "2024-12-14 17:15:33.436 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 17:15:33.436 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", + "2024-12-14 17:15:33.436 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.437 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.437 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/labevents/[0-107727).parquet\n", + "2024-12-14 17:15:33.458 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.028130\n", + "2024-12-14 17:15:33.458 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/labevents/.[0-107727).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/labevents/.[0-107727).parquet_cache/locks/2024-12-14T17:15:33.430538.json\n", + "2024-12-14 17:15:33.459 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 17:15:33.460 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.460250. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.460 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 17:15:33.460 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.460 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 17:15:33.465 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting start\n", + "2024-12-14 17:15:33.466 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 17:15:33.466 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.466 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.466 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting end\n", + "2024-12-14 17:15:33.466 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 17:15:33.467 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column endtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.467 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"endtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.467 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 17:15:33.470 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.010316\n", + "2024-12-14 17:15:33.470 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/procedureevents/.[0-1468).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/procedureevents/.[0-1468).parquet_cache/locks/2024-12-14T17:15:33.460250.json\n", + "2024-12-14 17:15:33.471 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 17:15:33.471 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.471585. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.471 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 17:15:33.471 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.472 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 17:15:33.476 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication_start\n", + "2024-12-14 17:15:33.476 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", + "2024-12-14 17:15:33.476 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", + "2024-12-14 17:15:33.476 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce([col(\"starttime\").str.strptime([String(raise)])]).is_not_null()\n", + "2024-12-14 17:15:33.477 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication_stop\n", + "2024-12-14 17:15:33.477 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", + "2024-12-14 17:15:33.477 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column stoptime in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", + "2024-12-14 17:15:33.477 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"stoptime\").str.strptime([String(raise)]).coalesce([col(\"stoptime\").str.strptime([String(raise)])]).is_not_null()\n", + "2024-12-14 17:15:33.477 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 17:15:33.490 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.019048\n", + "2024-12-14 17:15:33.490 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/pharmacy/.[0-15306).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/pharmacy/.[0-15306).parquet_cache/locks/2024-12-14T17:15:33.471585.json\n", + "2024-12-14 17:15:33.491 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/admissions/[0-275).parquet\n", + "2024-12-14 17:15:33.491 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.491829. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.492 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet\n", + "2024-12-14 17:15:33.492 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.492 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/admissions/[0-275).parquet\n", + "2024-12-14 17:15:33.496 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ed_registration\n", + "2024-12-14 17:15:33.496 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column edregtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.496 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"edregtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.496 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ed_out\n", + "2024-12-14 17:15:33.496 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column edouttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.496 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"edouttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.497 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting admission\n", + "2024-12-14 17:15:33.497 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admission_location\n", + "2024-12-14 17:15:33.497 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admission_type\n", + "2024-12-14 17:15:33.497 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column admittime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.497 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"admittime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.497 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting discharge\n", + "2024-12-14 17:15:33.498 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column discharge_location\n", + "2024-12-14 17:15:33.498 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column dischtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.498 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dischtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.498 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/admissions/[0-275).parquet\n", + "2024-12-14 17:15:33.503 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.011900\n", + "2024-12-14 17:15:33.503 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/admissions/.[0-275).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/admissions/.[0-275).parquet_cache/locks/2024-12-14T17:15:33.491829.json\n", + "2024-12-14 17:15:33.504 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/transfers/[0-1190).parquet\n", + "2024-12-14 17:15:33.504 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.504429. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.504 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet\n", + "2024-12-14 17:15:33.504 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.504 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/transfers/[0-1190).parquet\n", + "2024-12-14 17:15:33.508 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting transfer\n", + "2024-12-14 17:15:33.508 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column careunit\n", + "2024-12-14 17:15:33.508 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column eventtype\n", + "2024-12-14 17:15:33.508 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column intime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.509 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"intime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.509 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/transfers/[0-1190).parquet\n", + "2024-12-14 17:15:33.510 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006418\n", + "2024-12-14 17:15:33.510 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/transfers/.[0-1190).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/transfers/.[0-1190).parquet_cache/locks/2024-12-14T17:15:33.504429.json\n", + "2024-12-14 17:15:33.511 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/chartevents/[0-668862).parquet\n", + "2024-12-14 17:15:33.511 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.511839. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.512 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet\n", + "2024-12-14 17:15:33.512 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.512 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/chartevents/[0-668862).parquet\n", + "2024-12-14 17:15:33.516 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting event\n", + "2024-12-14 17:15:33.516 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 17:15:33.516 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", + "2024-12-14 17:15:33.516 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.517 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.517 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/chartevents/[0-668862).parquet\n", + "2024-12-14 17:15:33.649 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.137863\n", + "2024-12-14 17:15:33.649 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/chartevents/.[0-668862).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/chartevents/.[0-668862).parquet_cache/locks/2024-12-14T17:15:33.511839.json\n", + "2024-12-14 17:15:33.650 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/omr/[0-2964).parquet\n", + "2024-12-14 17:15:33.650 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.650624. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.650 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet\n", + "2024-12-14 17:15:33.651 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.651 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/omr/[0-2964).parquet\n", + "2024-12-14 17:15:33.655 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting omr\n", + "2024-12-14 17:15:33.655 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column result_name\n", + "2024-12-14 17:15:33.655 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", + "2024-12-14 17:15:33.655 | WARNING | MEDS_transforms.extract.convert_to_sharded_events:extract_event:507 - Source column 'col(result_value)' for event column text_value is always interpreted as a column name. Removing col() function call and setting source column to result_value.\n", + "2024-12-14 17:15:33.655 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:558 - Filtering out rows with null codes via col(\"result_name\").is_not_null()\n", + "2024-12-14 17:15:33.655 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.655 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/omr/[0-2964).parquet\n", + "2024-12-14 17:15:33.658 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007397\n", + "2024-12-14 17:15:33.658 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/omr/.[0-2964).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/omr/.[0-2964).parquet_cache/locks/2024-12-14T17:15:33.650624.json\n", + "2024-12-14 17:15:33.658 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 17:15:33.659 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.659087. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.659 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 17:15:33.659 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.659 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/outputevents/[0-9362).parquet\n", + "2024-12-14 17:15:33.663 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting output\n", + "2024-12-14 17:15:33.665 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 17:15:33.665 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", + "2024-12-14 17:15:33.665 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.665 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.665 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 17:15:33.669 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.010815\n", + "2024-12-14 17:15:33.669 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/outputevents/.[0-9362).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/outputevents/.[0-9362).parquet_cache/locks/2024-12-14T17:15:33.659087.json\n", + "2024-12-14 17:15:33.670 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/emar/[0-35835).parquet\n", + "2024-12-14 17:15:33.670 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.670832. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.671 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet\n", + "2024-12-14 17:15:33.671 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.671 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/emar/[0-35835).parquet\n", + "2024-12-14 17:15:33.675 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication\n", + "2024-12-14 17:15:33.675 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column event_txt\n", + "2024-12-14 17:15:33.675 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", + "2024-12-14 17:15:33.675 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.675 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.675 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/emar/[0-35835).parquet\n", + "2024-12-14 17:15:33.688 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.018080\n", + "2024-12-14 17:15:33.689 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/emar/.[0-35835).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/emar/.[0-35835).parquet_cache/locks/2024-12-14T17:15:33.670832.json\n", + "2024-12-14 17:15:33.689 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:823 - Subsharded into converted events.\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:08.766\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: merge_to_MEDS_cohort\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:08.772\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-merge_to_MEDS_cohort --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=merge_to_MEDS_cohort\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:10.205\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[32m2024-12-14 17:15:33.763\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: merge_to_MEDS_cohort\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:33.769\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-merge_to_MEDS_cohort --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=merge_to_MEDS_cohort\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:35.153\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:10.207\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", - "2024-12-14 16:07:09.354 | INFO | MEDS_transforms.utils:stage_init:73 - Running merge_to_MEDS_cohort with the following configuration:\n", + "\u001b[32m2024-12-14 17:15:35.154\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 17:15:34.367 | INFO | MEDS_transforms.utils:stage_init:73 - Running merge_to_MEDS_cohort with the following configuration:\n", "input_dir: ${oc.env:MIMICIV_PRE_MEDS_DIR}\n", "cohort_dir: ${oc.env:MIMICIV_MEDS_COHORT_DIR}\n", "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", @@ -2546,9 +2534,9 @@ " n_subjects_per_shard: 1000\n", " external_splits_json_fp: null\n", " split_fracs:\n", - " train: 0.8\n", - " tuning: 0.1\n", - " held_out: 0.1\n", + " train: 0.5\n", + " tuning: 0.25\n", + " held_out: 0.25\n", " convert_to_sharded_events:\n", " do_dedup_text_and_numeric: true\n", " merge_to_MEDS_cohort:\n", @@ -2564,10 +2552,6 @@ " do_retype: true\n", " finalize_MEDS_data:\n", " do_retype: true\n", - " split_fracs:\n", - " train: 0.5\n", - " tuning: 0.25\n", - " held_out: 0.25\n", "worker: 0\n", "polling_time: 300\n", "stage: merge_to_MEDS_cohort\n", @@ -2590,7 +2574,7 @@ "event_conversion_config_fp: ${oc.env:EVENT_CONVERSION_CONFIG_FP}\n", "shards_map_fp: ${cohort_dir}/metadata/.shards.json\n", "\n", - "2024-12-14 16:07:09.370 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "2024-12-14 17:15:34.382 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", "unique_by: '*'\n", "additional_sort_by: null\n", "is_metadata: false\n", @@ -2603,11 +2587,11 @@ " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events\n", " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort\n", " - metadata_input_dir: ❌ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/split_and_shard_subjects\n", - "2024-12-14 16:07:09.385 | INFO | MEDS_transforms.mapreduce.utils:shard_iterator_by_shard_map:687 - Mapping computation over a maximum of 3 shards\n", - "2024-12-14 16:07:09.393 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0 into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet\n", - "2024-12-14 16:07:09.394 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:09.393828. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:09.395 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0\n", - "2024-12-14 16:07:09.396 | INFO | MEDS_transforms.extract.merge_to_MEDS_cohort:merge_subdirs_and_sort:204 - Reading 16 files:\n", + "2024-12-14 17:15:34.397 | INFO | MEDS_transforms.mapreduce.utils:shard_iterator_by_shard_map:687 - Mapping computation over a maximum of 3 shards\n", + "2024-12-14 17:15:34.404 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0 into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet\n", + "2024-12-14 17:15:34.405 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:34.405491. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:34.407 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0\n", + "2024-12-14 17:15:34.408 | INFO | MEDS_transforms.extract.merge_to_MEDS_cohort:merge_subdirs_and_sort:204 - Reading 16 files:\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/admissions/[0-275).parquet\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/diagnoses_icd/[0-4506).parquet\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/drgcodes/[0-454).parquet\n", @@ -2624,14 +2608,14 @@ " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/procedureevents/[0-1468).parquet\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/inputevents/[0-20404).parquet\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/outputevents/[0-9362).parquet\n", - "2024-12-14 16:07:09.405 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:09.405 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet\n", - "2024-12-14 16:07:09.460 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.066957\n", - "2024-12-14 16:07:09.460 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/held_out/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/held_out/.0.parquet_cache/locks/2024-12-14T16:07:09.393828.json\n", - "2024-12-14 16:07:09.461 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0 into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet\n", - "2024-12-14 16:07:09.461 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:09.461748. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:09.462 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0\n", - "2024-12-14 16:07:09.463 | INFO | MEDS_transforms.extract.merge_to_MEDS_cohort:merge_subdirs_and_sort:204 - Reading 16 files:\n", + "2024-12-14 17:15:34.417 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:34.417 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet\n", + "2024-12-14 17:15:34.536 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.130660\n", + "2024-12-14 17:15:34.536 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/held_out/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/held_out/.0.parquet_cache/locks/2024-12-14T17:15:34.405491.json\n", + "2024-12-14 17:15:34.536 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0 into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet\n", + "2024-12-14 17:15:34.537 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:34.537340. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:34.537 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0\n", + "2024-12-14 17:15:34.538 | INFO | MEDS_transforms.extract.merge_to_MEDS_cohort:merge_subdirs_and_sort:204 - Reading 16 files:\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/admissions/[0-275).parquet\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/diagnoses_icd/[0-4506).parquet\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/drgcodes/[0-454).parquet\n", @@ -2648,14 +2632,14 @@ " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/procedureevents/[0-1468).parquet\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/inputevents/[0-20404).parquet\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/outputevents/[0-9362).parquet\n", - "2024-12-14 16:07:09.464 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:09.465 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet\n", - "2024-12-14 16:07:09.512 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.050432\n", - "2024-12-14 16:07:09.512 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/tuning/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/tuning/.0.parquet_cache/locks/2024-12-14T16:07:09.461748.json\n", - "2024-12-14 16:07:09.512 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0 into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/train/0.parquet\n", - "2024-12-14 16:07:09.513 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:09.513190. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:09.513 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0\n", - "2024-12-14 16:07:09.514 | INFO | MEDS_transforms.extract.merge_to_MEDS_cohort:merge_subdirs_and_sort:204 - Reading 16 files:\n", + "2024-12-14 17:15:34.540 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:34.540 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet\n", + "2024-12-14 17:15:34.716 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.178790\n", + "2024-12-14 17:15:34.716 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/tuning/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/tuning/.0.parquet_cache/locks/2024-12-14T17:15:34.537340.json\n", + "2024-12-14 17:15:34.716 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0 into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/train/0.parquet\n", + "2024-12-14 17:15:34.717 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:34.717118. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:34.717 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0\n", + "2024-12-14 17:15:34.718 | INFO | MEDS_transforms.extract.merge_to_MEDS_cohort:merge_subdirs_and_sort:204 - Reading 16 files:\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/admissions/[0-275).parquet\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/diagnoses_icd/[0-4506).parquet\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/drgcodes/[0-454).parquet\n", @@ -2672,18 +2656,18 @@ " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/procedureevents/[0-1468).parquet\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/inputevents/[0-20404).parquet\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/outputevents/[0-9362).parquet\n", - "2024-12-14 16:07:09.516 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:09.516 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/train/0.parquet\n", - "2024-12-14 16:07:10.123 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.610197\n", - "2024-12-14 16:07:10.123 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/train/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/train/.0.parquet_cache/locks/2024-12-14T16:07:09.513190.json\n", - "2024-12-14 16:07:10.123 | INFO | MEDS_transforms.mapreduce.mapper:map_over:683 - Finished mapping in 0:00:00.753831\n", + "2024-12-14 17:15:34.720 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:34.720 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/train/0.parquet\n", + "2024-12-14 17:15:35.081 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.364595\n", + "2024-12-14 17:15:35.081 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/train/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/train/.0.parquet_cache/locks/2024-12-14T17:15:34.717118.json\n", + "2024-12-14 17:15:35.082 | INFO | MEDS_transforms.mapreduce.mapper:map_over:683 - Finished mapping in 0:00:00.699477\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:10.209\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: extract_code_metadata\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:10.216\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-extract_code_metadata --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=extract_code_metadata\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:11.170\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[32m2024-12-14 17:15:35.157\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: extract_code_metadata\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:35.164\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-extract_code_metadata --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=extract_code_metadata\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:36.196\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:11.171\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", - "2024-12-14 16:07:10.771 | INFO | MEDS_transforms.utils:stage_init:73 - Running extract_code_metadata with the following configuration:\n", + "\u001b[32m2024-12-14 17:15:36.196\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 17:15:35.785 | INFO | MEDS_transforms.utils:stage_init:73 - Running extract_code_metadata with the following configuration:\n", "input_dir: ${oc.env:MIMICIV_PRE_MEDS_DIR}\n", "cohort_dir: ${oc.env:MIMICIV_MEDS_COHORT_DIR}\n", "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", @@ -2712,9 +2696,9 @@ " n_subjects_per_shard: 1000\n", " external_splits_json_fp: null\n", " split_fracs:\n", - " train: 0.8\n", - " tuning: 0.1\n", - " held_out: 0.1\n", + " train: 0.5\n", + " tuning: 0.25\n", + " held_out: 0.25\n", " convert_to_sharded_events:\n", " do_dedup_text_and_numeric: true\n", " merge_to_MEDS_cohort:\n", @@ -2730,10 +2714,6 @@ " do_retype: true\n", " finalize_MEDS_data:\n", " do_retype: true\n", - " split_fracs:\n", - " train: 0.5\n", - " tuning: 0.25\n", - " held_out: 0.25\n", "worker: 0\n", "polling_time: 300\n", "stage: extract_code_metadata\n", @@ -2756,7 +2736,7 @@ "event_conversion_config_fp: ${oc.env:EVENT_CONVERSION_CONFIG_FP}\n", "shards_map_fp: ${cohort_dir}/metadata/.shards.json\n", "\n", - "2024-12-14 16:07:10.787 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "2024-12-14 17:15:35.802 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", "is_metadata: true\n", "description_separator: '\n", "\n", @@ -2771,8 +2751,8 @@ " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort\n", " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata\n", " - metadata_input_dir: ❌ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/split_and_shard_subjects\n", - "2024-12-14 16:07:10.788 | INFO | MEDS_transforms.extract.extract_code_metadata:main:359 - Reading event conversion config from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs/event_configs.yaml\n", - "2024-12-14 16:07:10.819 | INFO | MEDS_transforms.extract.extract_code_metadata:main:361 - Event conversion config:\n", + "2024-12-14 17:15:35.803 | INFO | MEDS_transforms.extract.extract_code_metadata:main:359 - Reading event conversion config from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs/event_configs.yaml\n", + "2024-12-14 17:15:35.835 | INFO | MEDS_transforms.extract.extract_code_metadata:main:361 - Event conversion config:\n", "subject_id_col: subject_id\n", "hosp/admissions:\n", " ed_registration:\n", @@ -3095,107 +3075,107 @@ " valueuom: unitname\n", " parent_codes: '{omop_vocabulary_id}/{omop_concept_code}'\n", "\n", - "2024-12-14 16:07:10.871 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_diagnoses.parquet\n", - "2024-12-14 16:07:10.871 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_diagnoses.parquet and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/d_icd_diagnoses.parquet\n", - "2024-12-14 16:07:10.872 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:10.872428. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:10.874 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_diagnoses.parquet\n", - "2024-12-14 16:07:10.874 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:35.887 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_procedures.parquet\n", + "2024-12-14 17:15:35.887 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_procedures.parquet and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/d_icd_procedures.parquet\n", + "2024-12-14 17:15:35.889 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:35.888743. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:35.890 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_procedures.parquet\n", + "2024-12-14 17:15:35.890 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/extract_code_metadata.py:184: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", " if metadata_df.schema[mandatory_col] is not mandatory_type:\n", - "2024-12-14 16:07:10.880 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", - "2024-12-14 16:07:10.881 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", - "2024-12-14 16:07:10.881 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/d_icd_diagnoses.parquet\n", - "2024-12-14 16:07:10.932 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.059924\n", - "2024-12-14 16:07:10.932 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/.d_icd_diagnoses.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/.d_icd_diagnoses.parquet_cache/locks/2024-12-14T16:07:10.872428.json\n", - "2024-12-14 16:07:10.933 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_hcpcs.csv\n", - "2024-12-14 16:07:10.933 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_hcpcs.csv and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/d_hcpcs.parquet\n", - "2024-12-14 16:07:10.933 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:10.933340. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:10.933 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_hcpcs.csv\n", - "2024-12-14 16:07:10.934 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:10.936 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", - "2024-12-14 16:07:10.936 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/d_hcpcs.parquet\n", - "2024-12-14 16:07:10.967 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.034517\n", - "2024-12-14 16:07:10.967 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/.d_hcpcs.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/.d_hcpcs.parquet_cache/locks/2024-12-14T16:07:10.933340.json\n", - "2024-12-14 16:07:10.968 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/d_labitems_to_loinc.csv\n", - "2024-12-14 16:07:10.968 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/d_labitems_to_loinc.csv and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/d_labitems_to_loinc.parquet\n", - "2024-12-14 16:07:10.969 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:10.969008. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:10.969 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/d_labitems_to_loinc.csv\n", - "2024-12-14 16:07:10.969 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:10.972 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", - "2024-12-14 16:07:10.972 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", - "2024-12-14 16:07:10.973 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", - "2024-12-14 16:07:10.974 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", - "2024-12-14 16:07:10.974 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/d_labitems_to_loinc.parquet\n", - "2024-12-14 16:07:10.996 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.027039\n", - "2024-12-14 16:07:10.996 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.d_labitems_to_loinc.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.d_labitems_to_loinc.parquet_cache/locks/2024-12-14T16:07:10.969008.json\n", - "2024-12-14 16:07:10.996 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_procedures.parquet\n", - "2024-12-14 16:07:10.996 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_procedures.parquet and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/d_icd_procedures.parquet\n", - "2024-12-14 16:07:10.997 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:10.997044. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:10.997 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_procedures.parquet\n", - "2024-12-14 16:07:10.997 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:11.001 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", - "2024-12-14 16:07:11.001 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", - "2024-12-14 16:07:11.001 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/d_icd_procedures.parquet\n", - "2024-12-14 16:07:11.028 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.031820\n", - "2024-12-14 16:07:11.028 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/.d_icd_procedures.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/.d_icd_procedures.parquet_cache/locks/2024-12-14T16:07:10.997044.json\n", - "2024-12-14 16:07:11.029 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/inputevents_to_rxnorm.csv\n", - "2024-12-14 16:07:11.029 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/inputevents_to_rxnorm.csv and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/inputevents_to_rxnorm.parquet\n", - "2024-12-14 16:07:11.029 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:11.029852. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:11.030 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/inputevents_to_rxnorm.csv\n", - "2024-12-14 16:07:11.030 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:11.032 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", - "2024-12-14 16:07:11.032 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", - "2024-12-14 16:07:11.033 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", - "2024-12-14 16:07:11.033 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", - "2024-12-14 16:07:11.033 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/inputevents_to_rxnorm.parquet\n", - "2024-12-14 16:07:11.044 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.014199\n", - "2024-12-14 16:07:11.044 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.inputevents_to_rxnorm.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.inputevents_to_rxnorm.parquet_cache/locks/2024-12-14T16:07:11.029852.json\n", - "2024-12-14 16:07:11.044 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_itemid.csv\n", - "2024-12-14 16:07:11.044 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_itemid.csv and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/proc_itemid.parquet\n", - "2024-12-14 16:07:11.045 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:11.045049. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:11.045 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_itemid.csv\n", - "2024-12-14 16:07:11.045 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:11.047 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", - "2024-12-14 16:07:11.047 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", - "2024-12-14 16:07:11.048 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", - "2024-12-14 16:07:11.048 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", - "2024-12-14 16:07:11.048 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/proc_itemid.parquet\n", - "2024-12-14 16:07:11.056 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.011186\n", - "2024-12-14 16:07:11.056 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.proc_itemid.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.proc_itemid.parquet_cache/locks/2024-12-14T16:07:11.045049.json\n", - "2024-12-14 16:07:11.056 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/outputevents_to_loinc.csv\n", - "2024-12-14 16:07:11.056 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/outputevents_to_loinc.csv and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/outputevents_to_loinc.parquet\n", - "2024-12-14 16:07:11.057 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:11.057011. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:11.057 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/outputevents_to_loinc.csv\n", - "2024-12-14 16:07:11.057 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:11.059 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", - "2024-12-14 16:07:11.059 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", - "2024-12-14 16:07:11.059 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/outputevents_to_loinc.parquet\n", - "2024-12-14 16:07:11.061 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.004929\n", - "2024-12-14 16:07:11.062 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.outputevents_to_loinc.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.outputevents_to_loinc.parquet_cache/locks/2024-12-14T16:07:11.057011.json\n", - "2024-12-14 16:07:11.062 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_datetimeevents.csv\n", - "2024-12-14 16:07:11.062 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_datetimeevents.csv and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/proc_datetimeevents.parquet\n", - "2024-12-14 16:07:11.063 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:11.062871. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:11.063 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_datetimeevents.csv\n", - "2024-12-14 16:07:11.063 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:11.065 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", - "2024-12-14 16:07:11.065 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", - "2024-12-14 16:07:11.066 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", - "2024-12-14 16:07:11.066 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", - "2024-12-14 16:07:11.066 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/proc_datetimeevents.parquet\n", - "2024-12-14 16:07:11.072 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.009717\n", - "2024-12-14 16:07:11.072 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.proc_datetimeevents.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.proc_datetimeevents.parquet_cache/locks/2024-12-14T16:07:11.062871.json\n", - "2024-12-14 16:07:11.072 | INFO | MEDS_transforms.extract.extract_code_metadata:main:398 - Extracted metadata for all events. Merging.\n", - "2024-12-14 16:07:11.072 | INFO | MEDS_transforms.extract.extract_code_metadata:main:404 - Starting reduction process\n", - "2024-12-14 16:07:11.072 | INFO | MEDS_transforms.extract.extract_code_metadata:main:412 - All map shards complete! Starting code metadata reduction computation.\n", - "2024-12-14 16:07:11.089 | INFO | MEDS_transforms.extract.extract_code_metadata:main:424 - Collected metadata for 2661 unique codes among 42898 total observations.\n", - "2024-12-14 16:07:11.124 | INFO | MEDS_transforms.extract.extract_code_metadata:main:449 - Finished reduction in 0:00:00.051941\n", + "2024-12-14 17:15:35.898 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 17:15:35.899 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", + "2024-12-14 17:15:35.899 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/d_icd_procedures.parquet\n", + "2024-12-14 17:15:35.946 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.057475\n", + "2024-12-14 17:15:35.946 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/.d_icd_procedures.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/.d_icd_procedures.parquet_cache/locks/2024-12-14T17:15:35.888743.json\n", + "2024-12-14 17:15:35.947 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/inputevents_to_rxnorm.csv\n", + "2024-12-14 17:15:35.947 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/inputevents_to_rxnorm.csv and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/inputevents_to_rxnorm.parquet\n", + "2024-12-14 17:15:35.947 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:35.947489. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:35.947 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/inputevents_to_rxnorm.csv\n", + "2024-12-14 17:15:35.948 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:35.951 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 17:15:35.951 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", + "2024-12-14 17:15:35.953 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 17:15:35.953 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", + "2024-12-14 17:15:35.953 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/inputevents_to_rxnorm.parquet\n", + "2024-12-14 17:15:35.970 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.022885\n", + "2024-12-14 17:15:35.970 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.inputevents_to_rxnorm.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.inputevents_to_rxnorm.parquet_cache/locks/2024-12-14T17:15:35.947489.json\n", + "2024-12-14 17:15:35.971 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_datetimeevents.csv\n", + "2024-12-14 17:15:35.971 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_datetimeevents.csv and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/proc_datetimeevents.parquet\n", + "2024-12-14 17:15:35.971 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:35.971762. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:35.972 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_datetimeevents.csv\n", + "2024-12-14 17:15:35.972 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:35.973 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 17:15:35.974 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", + "2024-12-14 17:15:35.975 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 17:15:35.975 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", + "2024-12-14 17:15:35.975 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/proc_datetimeevents.parquet\n", + "2024-12-14 17:15:35.982 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.010768\n", + "2024-12-14 17:15:35.982 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.proc_datetimeevents.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.proc_datetimeevents.parquet_cache/locks/2024-12-14T17:15:35.971762.json\n", + "2024-12-14 17:15:35.983 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_hcpcs.csv\n", + "2024-12-14 17:15:35.983 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_hcpcs.csv and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/d_hcpcs.parquet\n", + "2024-12-14 17:15:35.983 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:35.983359. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:35.983 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_hcpcs.csv\n", + "2024-12-14 17:15:35.983 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:35.985 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 17:15:35.985 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/d_hcpcs.parquet\n", + "2024-12-14 17:15:36.015 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.032007\n", + "2024-12-14 17:15:36.015 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/.d_hcpcs.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/.d_hcpcs.parquet_cache/locks/2024-12-14T17:15:35.983359.json\n", + "2024-12-14 17:15:36.016 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_diagnoses.parquet\n", + "2024-12-14 17:15:36.016 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_diagnoses.parquet and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/d_icd_diagnoses.parquet\n", + "2024-12-14 17:15:36.016 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:36.016392. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:36.016 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_diagnoses.parquet\n", + "2024-12-14 17:15:36.016 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:36.019 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 17:15:36.019 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", + "2024-12-14 17:15:36.019 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/d_icd_diagnoses.parquet\n", + "2024-12-14 17:15:36.052 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.035722\n", + "2024-12-14 17:15:36.052 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/.d_icd_diagnoses.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/.d_icd_diagnoses.parquet_cache/locks/2024-12-14T17:15:36.016392.json\n", + "2024-12-14 17:15:36.053 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/d_labitems_to_loinc.csv\n", + "2024-12-14 17:15:36.053 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/d_labitems_to_loinc.csv and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/d_labitems_to_loinc.parquet\n", + "2024-12-14 17:15:36.053 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:36.053647. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:36.053 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/d_labitems_to_loinc.csv\n", + "2024-12-14 17:15:36.054 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:36.056 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 17:15:36.057 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", + "2024-12-14 17:15:36.058 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 17:15:36.058 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", + "2024-12-14 17:15:36.058 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/d_labitems_to_loinc.parquet\n", + "2024-12-14 17:15:36.077 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.023464\n", + "2024-12-14 17:15:36.077 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.d_labitems_to_loinc.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.d_labitems_to_loinc.parquet_cache/locks/2024-12-14T17:15:36.053647.json\n", + "2024-12-14 17:15:36.077 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/outputevents_to_loinc.csv\n", + "2024-12-14 17:15:36.077 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/outputevents_to_loinc.csv and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/outputevents_to_loinc.parquet\n", + "2024-12-14 17:15:36.078 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:36.078096. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:36.078 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/outputevents_to_loinc.csv\n", + "2024-12-14 17:15:36.078 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:36.080 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 17:15:36.080 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", + "2024-12-14 17:15:36.080 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/outputevents_to_loinc.parquet\n", + "2024-12-14 17:15:36.083 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.005380\n", + "2024-12-14 17:15:36.083 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.outputevents_to_loinc.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.outputevents_to_loinc.parquet_cache/locks/2024-12-14T17:15:36.078096.json\n", + "2024-12-14 17:15:36.084 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_itemid.csv\n", + "2024-12-14 17:15:36.084 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_itemid.csv and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/proc_itemid.parquet\n", + "2024-12-14 17:15:36.084 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:36.084421. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:36.084 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_itemid.csv\n", + "2024-12-14 17:15:36.084 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:36.086 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 17:15:36.086 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", + "2024-12-14 17:15:36.088 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 17:15:36.088 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", + "2024-12-14 17:15:36.088 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/proc_itemid.parquet\n", + "2024-12-14 17:15:36.096 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.011606\n", + "2024-12-14 17:15:36.096 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.proc_itemid.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.proc_itemid.parquet_cache/locks/2024-12-14T17:15:36.084421.json\n", + "2024-12-14 17:15:36.096 | INFO | MEDS_transforms.extract.extract_code_metadata:main:398 - Extracted metadata for all events. Merging.\n", + "2024-12-14 17:15:36.096 | INFO | MEDS_transforms.extract.extract_code_metadata:main:404 - Starting reduction process\n", + "2024-12-14 17:15:36.096 | INFO | MEDS_transforms.extract.extract_code_metadata:main:412 - All map shards complete! Starting code metadata reduction computation.\n", + "2024-12-14 17:15:36.113 | INFO | MEDS_transforms.extract.extract_code_metadata:main:424 - Collected metadata for 2661 unique codes among 42898 total observations.\n", + "2024-12-14 17:15:36.151 | INFO | MEDS_transforms.extract.extract_code_metadata:main:449 - Finished reduction in 0:00:00.054648\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:11.175\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: finalize_MEDS_metadata\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:11.221\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-finalize_MEDS_metadata --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=finalize_MEDS_metadata\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:12.072\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[32m2024-12-14 17:15:36.201\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: finalize_MEDS_metadata\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:36.249\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-finalize_MEDS_metadata --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=finalize_MEDS_metadata\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:37.150\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:12.072\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", - "2024-12-14 16:07:11.807 | INFO | MEDS_transforms.utils:stage_init:73 - Running finalize_MEDS_metadata with the following configuration:\n", + "\u001b[32m2024-12-14 17:15:37.151\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 17:15:36.869 | INFO | MEDS_transforms.utils:stage_init:73 - Running finalize_MEDS_metadata with the following configuration:\n", "input_dir: ${oc.env:MIMICIV_PRE_MEDS_DIR}\n", "cohort_dir: ${oc.env:MIMICIV_MEDS_COHORT_DIR}\n", "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", @@ -3224,9 +3204,9 @@ " n_subjects_per_shard: 1000\n", " external_splits_json_fp: null\n", " split_fracs:\n", - " train: 0.8\n", - " tuning: 0.1\n", - " held_out: 0.1\n", + " train: 0.5\n", + " tuning: 0.25\n", + " held_out: 0.25\n", " convert_to_sharded_events:\n", " do_dedup_text_and_numeric: true\n", " merge_to_MEDS_cohort:\n", @@ -3242,10 +3222,6 @@ " do_retype: true\n", " finalize_MEDS_data:\n", " do_retype: true\n", - " split_fracs:\n", - " train: 0.5\n", - " tuning: 0.25\n", - " held_out: 0.25\n", "worker: 0\n", "polling_time: 300\n", "stage: finalize_MEDS_metadata\n", @@ -3268,7 +3244,7 @@ "event_conversion_config_fp: ${oc.env:EVENT_CONVERSION_CONFIG_FP}\n", "shards_map_fp: ${cohort_dir}/metadata/.shards.json\n", "\n", - "2024-12-14 16:07:11.825 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "2024-12-14 17:15:36.888 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", "is_metadata: true\n", "do_retype: true\n", "data_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort\n", @@ -3281,23 +3257,23 @@ " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort\n", " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/finalize_MEDS_metadata\n", " - metadata_input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata\n", - "2024-12-14 16:07:11.830 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:170 - Validating code metadata\n", - "2024-12-14 16:07:11.830 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:173 - Reading code metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/codes.parquet\n", - "2024-12-14 16:07:12.007 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:183 - Writing finalized metadata df to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata/codes.parquet\n", - "2024-12-14 16:07:12.016 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:187 - Creating dataset metadata\n", - "2024-12-14 16:07:12.019 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:199 - Writing finalized dataset metadata to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata/dataset.json\n", - "2024-12-14 16:07:12.019 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:204 - Creating subject splits from {str(shards_map_fp.resolve())}\n", - "2024-12-14 16:07:12.019 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:217 - Split train has 80 subjects\n", - "2024-12-14 16:07:12.020 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:217 - Split tuning has 10 subjects\n", - "2024-12-14 16:07:12.020 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:217 - Split held_out has 10 subjects\n", - "2024-12-14 16:07:12.022 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:222 - Writing finalized subject splits to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata/subject_splits.parquet\n", + "2024-12-14 17:15:36.892 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:170 - Validating code metadata\n", + "2024-12-14 17:15:36.892 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:173 - Reading code metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/codes.parquet\n", + "2024-12-14 17:15:37.079 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:183 - Writing finalized metadata df to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata/codes.parquet\n", + "2024-12-14 17:15:37.091 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:187 - Creating dataset metadata\n", + "2024-12-14 17:15:37.093 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:199 - Writing finalized dataset metadata to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata/dataset.json\n", + "2024-12-14 17:15:37.094 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:204 - Creating subject splits from {str(shards_map_fp.resolve())}\n", + "2024-12-14 17:15:37.094 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:217 - Split train has 50 subjects\n", + "2024-12-14 17:15:37.094 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:217 - Split tuning has 25 subjects\n", + "2024-12-14 17:15:37.094 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:217 - Split held_out has 25 subjects\n", + "2024-12-14 17:15:37.096 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:222 - Writing finalized subject splits to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata/subject_splits.parquet\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:12.073\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: finalize_MEDS_data\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:12.079\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-finalize_MEDS_data --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=finalize_MEDS_data\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:13.112\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[32m2024-12-14 17:15:37.152\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: finalize_MEDS_data\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:37.159\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-finalize_MEDS_data --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=finalize_MEDS_data\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:38.198\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:13.112\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", - "2024-12-14 16:07:12.623 | INFO | MEDS_transforms.utils:stage_init:73 - Running finalize_MEDS_data with the following configuration:\n", + "\u001b[32m2024-12-14 17:15:38.199\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 17:15:37.714 | INFO | MEDS_transforms.utils:stage_init:73 - Running finalize_MEDS_data with the following configuration:\n", "input_dir: ${oc.env:MIMICIV_PRE_MEDS_DIR}\n", "cohort_dir: ${oc.env:MIMICIV_MEDS_COHORT_DIR}\n", "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", @@ -3326,9 +3302,9 @@ " n_subjects_per_shard: 1000\n", " external_splits_json_fp: null\n", " split_fracs:\n", - " train: 0.8\n", - " tuning: 0.1\n", - " held_out: 0.1\n", + " train: 0.5\n", + " tuning: 0.25\n", + " held_out: 0.25\n", " convert_to_sharded_events:\n", " do_dedup_text_and_numeric: true\n", " merge_to_MEDS_cohort:\n", @@ -3344,10 +3320,6 @@ " do_retype: true\n", " finalize_MEDS_data:\n", " do_retype: true\n", - " split_fracs:\n", - " train: 0.5\n", - " tuning: 0.25\n", - " held_out: 0.25\n", "worker: 0\n", "polling_time: 300\n", "stage: finalize_MEDS_data\n", @@ -3370,7 +3342,7 @@ "event_conversion_config_fp: ${oc.env:EVENT_CONVERSION_CONFIG_FP}\n", "shards_map_fp: ${cohort_dir}/metadata/.shards.json\n", "\n", - "2024-12-14 16:07:12.640 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "2024-12-14 17:15:37.732 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", "do_retype: true\n", "is_metadata: false\n", "data_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort\n", @@ -3382,29 +3354,29 @@ " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort\n", " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data\n", " - metadata_input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata\n", - "2024-12-14 16:07:12.658 | INFO | MEDS_transforms.mapreduce.utils:shard_iterator:600 - Mapping computation over a maximum of 3 shards\n", - "2024-12-14 16:07:12.667 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\n", - "2024-12-14 16:07:12.668 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:12.668203. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:12.669 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet\n", - "2024-12-14 16:07:12.670 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:12.762 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\n", - "2024-12-14 16:07:12.777 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.109013\n", - "2024-12-14 16:07:12.777 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/.0.parquet_cache/locks/2024-12-14T16:07:12.668203.json\n", - "2024-12-14 16:07:12.777 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\n", - "2024-12-14 16:07:12.778 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:12.777977. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:12.778 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet\n", - "2024-12-14 16:07:12.778 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:12.783 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\n", - "2024-12-14 16:07:12.802 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.024486\n", - "2024-12-14 16:07:12.802 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/.0.parquet_cache/locks/2024-12-14T16:07:12.777977.json\n", - "2024-12-14 16:07:12.802 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/train/0.parquet into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\n", - "2024-12-14 16:07:12.803 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:12.803208. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:12.803 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/train/0.parquet\n", - "2024-12-14 16:07:12.803 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:12.858 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\n", - "2024-12-14 16:07:13.051 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.248693\n", - "2024-12-14 16:07:13.052 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/.0.parquet_cache/locks/2024-12-14T16:07:12.803208.json\n", - "2024-12-14 16:07:13.052 | INFO | MEDS_transforms.mapreduce.mapper:map_over:683 - Finished mapping in 0:00:00.411314\n", + "2024-12-14 17:15:37.749 | INFO | MEDS_transforms.mapreduce.utils:shard_iterator:600 - Mapping computation over a maximum of 3 shards\n", + "2024-12-14 17:15:37.758 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\n", + "2024-12-14 17:15:37.759 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:37.759430. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:37.760 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet\n", + "2024-12-14 17:15:37.761 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:37.862 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\n", + "2024-12-14 17:15:37.899 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.140464\n", + "2024-12-14 17:15:37.900 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/.0.parquet_cache/locks/2024-12-14T17:15:37.759430.json\n", + "2024-12-14 17:15:37.900 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\n", + "2024-12-14 17:15:37.900 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:37.900755. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:37.901 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet\n", + "2024-12-14 17:15:37.901 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:37.915 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\n", + "2024-12-14 17:15:37.979 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.078518\n", + "2024-12-14 17:15:37.979 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/.0.parquet_cache/locks/2024-12-14T17:15:37.900755.json\n", + "2024-12-14 17:15:37.979 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/train/0.parquet into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\n", + "2024-12-14 17:15:37.980 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:37.980261. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:37.980 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/train/0.parquet\n", + "2024-12-14 17:15:37.980 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:38.009 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\n", + "2024-12-14 17:15:38.139 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.159248\n", + "2024-12-14 17:15:38.139 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/.0.parquet_cache/locks/2024-12-14T17:15:37.980261.json\n", + "2024-12-14 17:15:38.139 | INFO | MEDS_transforms.mapreduce.mapper:map_over:683 - Finished mapping in 0:00:00.407567\n", "\u001b[0m\n" ] } @@ -3424,7 +3396,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 43, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -3444,7 +3416,7 @@ " white-space: pre-wrap;\n", "}\n", "\n", - "shape: (916_166, 4)
subject_idtimecodenumeric_value
i64datetime[μs]strf32
10012853null"GENDER//F"null
100128532084-01-01 00:00:00"MEDS_BIRTH"null
100128532175-04-05 00:00:00"Height (Inches)"null
100128532175-04-05 00:00:00"Weight (Lbs)"null
100128532175-04-05 00:00:00"BMI (kg/m2)"null
100398312116-01-09 11:00:00"LAB//50983//mEq/L"134.0
100398312116-01-09 11:00:00"LAB//50882//mEq/L"25.0
100398312116-01-09 11:00:00"LAB//50863//IU/L"112.0
100398312116-01-09 11:00:00"LAB//51237//UNK"3.3
100398312116-01-09 11:00:00"LAB//51274//sec"33.0
" + "shape: (916_166, 4)
subject_idtimecodenumeric_value
i64datetime[μs]strf32
10001725null"GENDER//F"null
100017252064-01-01 00:00:00"MEDS_BIRTH"null
100017252109-10-15 00:00:00"Weight (Lbs)"null
100017252109-10-15 00:00:00"Blood Pressure"null
100017252110-01-04 00:00:00"Weight (Lbs)"null
100397082145-01-04 11:47:00"LAB//50934//UNK"33.0
100397082145-01-04 11:47:00"LAB//50868//mEq/L"15.0
100397082145-01-04 12:24:00"LAB//51274//sec"13.4
100397082145-01-04 12:24:00"LAB//51237//UNK"1.2
100397082145-01-04 12:24:00"LAB//51275//sec"29.700001
" ], "text/plain": [ "shape: (916_166, 4)\n", @@ -3453,21 +3425,21 @@ "│ --- ┆ --- ┆ --- ┆ --- │\n", "│ i64 ┆ datetime[μs] ┆ str ┆ f32 │\n", "╞════════════╪═════════════════════╪═══════════════════╪═══════════════╡\n", - "│ 10012853 ┆ null ┆ GENDER//F ┆ null │\n", - "│ 10012853 ┆ 2084-01-01 00:00:00 ┆ MEDS_BIRTH ┆ null │\n", - "│ 10012853 ┆ 2175-04-05 00:00:00 ┆ Height (Inches) ┆ null │\n", - "│ 10012853 ┆ 2175-04-05 00:00:00 ┆ Weight (Lbs) ┆ null │\n", - "│ 10012853 ┆ 2175-04-05 00:00:00 ┆ BMI (kg/m2) ┆ null │\n", + "│ 10001725 ┆ null ┆ GENDER//F ┆ null │\n", + "│ 10001725 ┆ 2064-01-01 00:00:00 ┆ MEDS_BIRTH ┆ null │\n", + "│ 10001725 ┆ 2109-10-15 00:00:00 ┆ Weight (Lbs) ┆ null │\n", + "│ 10001725 ┆ 2109-10-15 00:00:00 ┆ Blood Pressure ┆ null │\n", + "│ 10001725 ┆ 2110-01-04 00:00:00 ┆ Weight (Lbs) ┆ null │\n", "│ … ┆ … ┆ … ┆ … │\n", - "│ 10039831 ┆ 2116-01-09 11:00:00 ┆ LAB//50983//mEq/L ┆ 134.0 │\n", - "│ 10039831 ┆ 2116-01-09 11:00:00 ┆ LAB//50882//mEq/L ┆ 25.0 │\n", - "│ 10039831 ┆ 2116-01-09 11:00:00 ┆ LAB//50863//IU/L ┆ 112.0 │\n", - "│ 10039831 ┆ 2116-01-09 11:00:00 ┆ LAB//51237//UNK ┆ 3.3 │\n", - "│ 10039831 ┆ 2116-01-09 11:00:00 ┆ LAB//51274//sec ┆ 33.0 │\n", + "│ 10039708 ┆ 2145-01-04 11:47:00 ┆ LAB//50934//UNK ┆ 33.0 │\n", + "│ 10039708 ┆ 2145-01-04 11:47:00 ┆ LAB//50868//mEq/L ┆ 15.0 │\n", + "│ 10039708 ┆ 2145-01-04 12:24:00 ┆ LAB//51274//sec ┆ 13.4 │\n", + "│ 10039708 ┆ 2145-01-04 12:24:00 ┆ LAB//51237//UNK ┆ 1.2 │\n", + "│ 10039708 ┆ 2145-01-04 12:24:00 ┆ LAB//51275//sec ┆ 29.700001 │\n", "└────────────┴─────────────────────┴───────────────────┴───────────────┘" ] }, - "execution_count": 21, + "execution_count": 43, "metadata": {}, "output_type": "execute_result" } @@ -3483,7 +3455,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 44, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -3511,30 +3483,30 @@ " white-space: pre-wrap;\n", "}\n", "\n", - "shape: (734, 2)
codecount
stru32
"DIAGNOSIS//ICD//10//E785"57
"DIAGNOSIS//ICD//10//E039"47
"DIAGNOSIS//ICD//10//Z794"37
"DIAGNOSIS//ICD//10//Z87891"35
"DIAGNOSIS//ICD//10//I2510"33
"DIAGNOSIS//ICD//10//M4856XA"1
"DIAGNOSIS//ICD//10//Z800"1
"DIAGNOSIS//ICD//10//M720"1
"DIAGNOSIS//ICD//10//Y92121"1
"DIAGNOSIS//ICD//10//T383X1A"1
" + "shape: (734, 2)
codecount
stru32
"DIAGNOSIS//ICD//10//E785"57
"DIAGNOSIS//ICD//10//E039"47
"DIAGNOSIS//ICD//10//Z794"37
"DIAGNOSIS//ICD//10//Z87891"35
"DIAGNOSIS//ICD//10//I2510"33
"DIAGNOSIS//ICD//10//Z792"1
"DIAGNOSIS//ICD//10//E041"1
"DIAGNOSIS//ICD//10//Z4502"1
"DIAGNOSIS//ICD//10//I2699"1
"DIAGNOSIS//ICD//10//J942"1
" ], "text/plain": [ "shape: (734, 2)\n", - "┌─────────────────────────────┬───────┐\n", - "│ code ┆ count │\n", - "│ --- ┆ --- │\n", - "│ str ┆ u32 │\n", - "╞═════════════════════════════╪═══════╡\n", - "│ DIAGNOSIS//ICD//10//E785 ┆ 57 │\n", - "│ DIAGNOSIS//ICD//10//E039 ┆ 47 │\n", - "│ DIAGNOSIS//ICD//10//Z794 ┆ 37 │\n", - "│ DIAGNOSIS//ICD//10//Z87891 ┆ 35 │\n", - "│ DIAGNOSIS//ICD//10//I2510 ┆ 33 │\n", - "│ … ┆ … │\n", - "│ DIAGNOSIS//ICD//10//M4856XA ┆ 1 │\n", - "│ DIAGNOSIS//ICD//10//Z800 ┆ 1 │\n", - "│ DIAGNOSIS//ICD//10//M720 ┆ 1 │\n", - "│ DIAGNOSIS//ICD//10//Y92121 ┆ 1 │\n", - "│ DIAGNOSIS//ICD//10//T383X1A ┆ 1 │\n", - "└─────────────────────────────┴───────┘" + "┌────────────────────────────┬───────┐\n", + "│ code ┆ count │\n", + "│ --- ┆ --- │\n", + "│ str ┆ u32 │\n", + "╞════════════════════════════╪═══════╡\n", + "│ DIAGNOSIS//ICD//10//E785 ┆ 57 │\n", + "│ DIAGNOSIS//ICD//10//E039 ┆ 47 │\n", + "│ DIAGNOSIS//ICD//10//Z794 ┆ 37 │\n", + "│ DIAGNOSIS//ICD//10//Z87891 ┆ 35 │\n", + "│ DIAGNOSIS//ICD//10//I2510 ┆ 33 │\n", + "│ … ┆ … │\n", + "│ DIAGNOSIS//ICD//10//Z792 ┆ 1 │\n", + "│ DIAGNOSIS//ICD//10//E041 ┆ 1 │\n", + "│ DIAGNOSIS//ICD//10//Z4502 ┆ 1 │\n", + "│ DIAGNOSIS//ICD//10//I2699 ┆ 1 │\n", + "│ DIAGNOSIS//ICD//10//J942 ┆ 1 │\n", + "└────────────────────────────┴───────┘" ] }, - "execution_count": 22, + "execution_count": 44, "metadata": {}, "output_type": "execute_result" } @@ -3549,7 +3521,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 45, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -3569,45 +3541,45 @@ " white-space: pre-wrap;\n", "}\n", "\n", - "shape: (2_661, 6)
codedescriptionparent_codespossibly_cpt_codeitemidvalueuom
strstrlist[str]list[str]list[str]list[str]
"DIAGNOSIS//ICD//9//7916""Acetonuria"["ICD9CM/791.6"][null][null][null]
"INFUSION_START//229654""amiodarone Injection"["RxNorm/1663223"][null]["229654"][null]
"DIAGNOSIS//ICD//9//82021""Closed fracture of intertrocha…["ICD9CM/820.21"][null][null][null]
"DIAGNOSIS//ICD//10//S22068A""Other fracture of T7-T8 thorac…["ICD10CM/S22.068A"][null][null][null]
"DIAGNOSIS//ICD//9//9585""Traumatic anuria"["ICD9CM/958.5"][null][null][null]
"LAB//51307//UNK""CD13 cells/100 cells in Bone m…["LOINC/51237-6"][null]["51307"][null]
"PROCEDURE//ICD//9//3722""Left heart cardiac catheteriza…["ICD9Proc/37.22"][null][null][null]
"DIAGNOSIS//ICD//9//53560""Duodenitis, without mention of…["ICD9CM/535.60"][null][null][null]
"PROCEDURE//ICD//9//9671""Continuous invasive mechanical…["ICD9Proc/96.71"][null][null][null]
"DIAGNOSIS//ICD//10//Y92129""Unspecified place in nursing h…["ICD10CM/Y92.129"][null][null][null]
" + "shape: (2_661, 6)
codedescriptionparent_codesitemidpossibly_cpt_codevalueuom
strstrlist[str]list[str]list[str]list[str]
"DIAGNOSIS//ICD//10//E873""Alkalosis"["ICD10CM/E87.3"][null][null][null]
"DIAGNOSIS//ICD//10//Z85810""Personal history of malignant …["ICD10CM/Z85.810"][null][null][null]
"LAB//50883//mg/dL""Bilirubin.direct [Mass/volume]…["LOINC/1968-7"]["50883"][null]["mg/dL"]
"LAB//51000//mg/dL""Triglyceride [Mass/volume] in …["LOINC/2571-8"]["51000"][null]["mg/dL"]
"INFUSION_END//227531""mannitol Injection"["RxNorm/1791383"]["227531"][null][null]
"LAB//51501//#/hpf""Transitional cells [#/area] in…["LOINC/30089-7"]["51501"][null]["#/hpf"]
"DIAGNOSIS//ICD//9//V600""Lack of housing"["ICD9CM/V60.0"][null][null][null]
"PROCEDURE//START//225400""Bronchoscopy"["SNOMED/10847001"]["225400"][null][null]
"LAB//50991//ng/mL""Thyroglobulin [Mass/volume] in…["LOINC/3013-0"]["50991"][null]["ng/mL"]
"LAB//51296//N/A""Dacrocytes [Presence] in Blood…["LOINC/7791-7"]["51296"][null]["N/A"]
" ], "text/plain": [ "shape: (2_661, 6)\n", - "┌──────────────────┬──────────────────┬─────────────────┬─────────────────┬────────────┬───────────┐\n", - "│ code ┆ description ┆ parent_codes ┆ possibly_cpt_co ┆ itemid ┆ valueuom │\n", - "│ --- ┆ --- ┆ --- ┆ de ┆ --- ┆ --- │\n", - "│ str ┆ str ┆ list[str] ┆ --- ┆ list[str] ┆ list[str] │\n", - "│ ┆ ┆ ┆ list[str] ┆ ┆ │\n", - "╞══════════════════╪══════════════════╪═════════════════╪═════════════════╪════════════╪═══════════╡\n", - "│ DIAGNOSIS//ICD// ┆ Acetonuria ┆ [\"ICD9CM/791.6\" ┆ [null] ┆ [null] ┆ [null] │\n", - "│ 9//7916 ┆ ┆ ] ┆ ┆ ┆ │\n", - "│ INFUSION_START// ┆ amiodarone ┆ [\"RxNorm/166322 ┆ [null] ┆ [\"229654\"] ┆ [null] │\n", - "│ 229654 ┆ Injection ┆ 3\"] ┆ ┆ ┆ │\n", - "│ DIAGNOSIS//ICD// ┆ Closed fracture ┆ [\"ICD9CM/820.21 ┆ [null] ┆ [null] ┆ [null] │\n", - "│ 9//82021 ┆ of intertrocha… ┆ \"] ┆ ┆ ┆ │\n", - "│ DIAGNOSIS//ICD// ┆ Other fracture ┆ [\"ICD10CM/S22.0 ┆ [null] ┆ [null] ┆ [null] │\n", - "│ 10//S22068A ┆ of T7-T8 thorac… ┆ 68A\"] ┆ ┆ ┆ │\n", - "│ DIAGNOSIS//ICD// ┆ Traumatic anuria ┆ [\"ICD9CM/958.5\" ┆ [null] ┆ [null] ┆ [null] │\n", - "│ 9//9585 ┆ ┆ ] ┆ ┆ ┆ │\n", - "│ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", - "│ LAB//51307//UNK ┆ CD13 cells/100 ┆ [\"LOINC/51237-6 ┆ [null] ┆ [\"51307\"] ┆ [null] │\n", - "│ ┆ cells in Bone m… ┆ \"] ┆ ┆ ┆ │\n", - "│ PROCEDURE//ICD// ┆ Left heart ┆ [\"ICD9Proc/37.2 ┆ [null] ┆ [null] ┆ [null] │\n", - "│ 9//3722 ┆ cardiac ┆ 2\"] ┆ ┆ ┆ │\n", - "│ ┆ catheteriza… ┆ ┆ ┆ ┆ │\n", - "│ DIAGNOSIS//ICD// ┆ Duodenitis, ┆ [\"ICD9CM/535.60 ┆ [null] ┆ [null] ┆ [null] │\n", - "│ 9//53560 ┆ without mention ┆ \"] ┆ ┆ ┆ │\n", - "│ ┆ of… ┆ ┆ ┆ ┆ │\n", - "│ PROCEDURE//ICD// ┆ Continuous ┆ [\"ICD9Proc/96.7 ┆ [null] ┆ [null] ┆ [null] │\n", - "│ 9//9671 ┆ invasive ┆ 1\"] ┆ ┆ ┆ │\n", - "│ ┆ mechanical… ┆ ┆ ┆ ┆ │\n", - "│ DIAGNOSIS//ICD// ┆ Unspecified ┆ [\"ICD10CM/Y92.1 ┆ [null] ┆ [null] ┆ [null] │\n", - "│ 10//Y92129 ┆ place in nursing ┆ 29\"] ┆ ┆ ┆ │\n", - "│ ┆ h… ┆ ┆ ┆ ┆ │\n", - "└──────────────────┴──────────────────┴─────────────────┴─────────────────┴────────────┴───────────┘" + "┌──────────────────┬──────────────────┬─────────────────┬────────────┬─────────────────┬───────────┐\n", + "│ code ┆ description ┆ parent_codes ┆ itemid ┆ possibly_cpt_co ┆ valueuom │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ de ┆ --- │\n", + "│ str ┆ str ┆ list[str] ┆ list[str] ┆ --- ┆ list[str] │\n", + "│ ┆ ┆ ┆ ┆ list[str] ┆ │\n", + "╞══════════════════╪══════════════════╪═════════════════╪════════════╪═════════════════╪═══════════╡\n", + "│ DIAGNOSIS//ICD// ┆ Alkalosis ┆ [\"ICD10CM/E87.3 ┆ [null] ┆ [null] ┆ [null] │\n", + "│ 10//E873 ┆ ┆ \"] ┆ ┆ ┆ │\n", + "│ DIAGNOSIS//ICD// ┆ Personal history ┆ [\"ICD10CM/Z85.8 ┆ [null] ┆ [null] ┆ [null] │\n", + "│ 10//Z85810 ┆ of malignant … ┆ 10\"] ┆ ┆ ┆ │\n", + "│ LAB//50883//mg/d ┆ Bilirubin.direct ┆ [\"LOINC/1968-7\" ┆ [\"50883\"] ┆ [null] ┆ [\"mg/dL\"] │\n", + "│ L ┆ [Mass/volume]… ┆ ] ┆ ┆ ┆ │\n", + "│ LAB//51000//mg/d ┆ Triglyceride ┆ [\"LOINC/2571-8\" ┆ [\"51000\"] ┆ [null] ┆ [\"mg/dL\"] │\n", + "│ L ┆ [Mass/volume] in ┆ ] ┆ ┆ ┆ │\n", + "│ ┆ … ┆ ┆ ┆ ┆ │\n", + "│ INFUSION_END//22 ┆ mannitol ┆ [\"RxNorm/179138 ┆ [\"227531\"] ┆ [null] ┆ [null] │\n", + "│ 7531 ┆ Injection ┆ 3\"] ┆ ┆ ┆ │\n", + "│ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ LAB//51501//#/hp ┆ Transitional ┆ [\"LOINC/30089-7 ┆ [\"51501\"] ┆ [null] ┆ [\"#/hpf\"] │\n", + "│ f ┆ cells [#/area] ┆ \"] ┆ ┆ ┆ │\n", + "│ ┆ in… ┆ ┆ ┆ ┆ │\n", + "│ DIAGNOSIS//ICD// ┆ Lack of housing ┆ [\"ICD9CM/V60.0\" ┆ [null] ┆ [null] ┆ [null] │\n", + "│ 9//V600 ┆ ┆ ] ┆ ┆ ┆ │\n", + "│ PROCEDURE//START ┆ Bronchoscopy ┆ [\"SNOMED/108470 ┆ [\"225400\"] ┆ [null] ┆ [null] │\n", + "│ //225400 ┆ ┆ 01\"] ┆ ┆ ┆ │\n", + "│ LAB//50991//ng/m ┆ Thyroglobulin ┆ [\"LOINC/3013-0\" ┆ [\"50991\"] ┆ [null] ┆ [\"ng/mL\"] │\n", + "│ L ┆ [Mass/volume] ┆ ] ┆ ┆ ┆ │\n", + "│ ┆ in… ┆ ┆ ┆ ┆ │\n", + "│ LAB//51296//N/A ┆ Dacrocytes ┆ [\"LOINC/7791-7\" ┆ [\"51296\"] ┆ [null] ┆ [\"N/A\"] │\n", + "│ ┆ [Presence] in ┆ ] ┆ ┆ ┆ │\n", + "│ ┆ Blood… ┆ ┆ ┆ ┆ │\n", + "└──────────────────┴──────────────────┴─────────────────┴────────────┴─────────────────┴───────────┘" ] }, - "execution_count": 23, + "execution_count": 45, "metadata": {}, "output_type": "execute_result" } @@ -3619,7 +3591,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 46, "metadata": {}, "outputs": [ { @@ -3645,9 +3617,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 47, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cloning into '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo//tmp'...\n", + "remote: Enumerating objects: 144, done.\u001b[K\n", + "remote: Counting objects: 100% (144/144), done.\u001b[K\n", + "remote: Compressing objects: 100% (129/129), done.\u001b[K\n", + "remote: Total 144 (delta 22), reused 70 (delta 7), pack-reused 0 (from 0)\u001b[K\n", + "Receiving objects: 100% (144/144), 211.41 KiB | 478.00 KiB/s, done.\n", + "Resolving deltas: 100% (22/22), done.\n" + ] + } + ], "source": [ "# Download pre-meds script, event config (defining how raw data is converted to meds data), and meds-transform config\n", "!mkdir {ROOT_DIR}/meds-transform/\n", @@ -3661,7 +3647,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 48, "metadata": {}, "outputs": [], "source": [ @@ -3670,7 +3656,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -3688,8 +3674,8 @@ "Additionally, consider reducing N_PARALLEL_WORKERS if > 1\n", "Skipping unzipping.\n", "Running pre-MEDS conversion.\n", - "\u001b[32m2024-12-14 16:18:06.881\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m278\u001b[0m - \u001b[1mLoading table preprocessors from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs/table_preprocessors.yaml...\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:06.900\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for admissionDx:\n", + "\u001b[32m2024-12-14 17:20:07.072\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m278\u001b[0m - \u001b[1mLoading table preprocessors from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs/table_preprocessors.yaml...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.090\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for admissionDx:\n", "offset_col: admitdxenteredoffset\n", "pseudotime_col: admitDxEnteredTimestamp\n", "output_data_cols:\n", @@ -3699,7 +3685,7 @@ "- How should we use `admitdxtest`?\n", "- How should we use `admitdxpath`?\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:06.900\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for allergy:\n", + "\u001b[32m2024-12-14 17:20:07.091\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for allergy:\n", "offset_col: allergyenteredoffset\n", "pseudotime_col: allergyEnteredTimestamp\n", "output_data_cols:\n", @@ -3712,21 +3698,21 @@ "- Is `drugName` the name of the drug to which the patient is allergic or the drug\n", " given to the patient (docs say 'name of the selected admission drug')?\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:06.901\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for carePlanGeneral:\n", + "\u001b[32m2024-12-14 17:20:07.091\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for carePlanGeneral:\n", "offset_col: cplitemoffset\n", "pseudotime_col: carePlanGeneralItemEnteredTimestamp\n", "output_data_cols:\n", "- cplgroup\n", "- cplitemvalue\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:06.901\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for carePlanEOL:\n", + "\u001b[32m2024-12-14 17:20:07.091\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for carePlanEOL:\n", "offset_col: cpleoldiscussionoffset\n", "pseudotime_col: carePlanEolDiscussionOccurredTimestamp\n", "warning_items:\n", "- Is the DiscussionOffset time actually reliable? Should we fall back on the SaveOffset\n", " time?\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:06.901\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for carePlanGoal:\n", + "\u001b[32m2024-12-14 17:20:07.092\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for carePlanGoal:\n", "offset_col: cplgoaloffset\n", "pseudotime_col: carePlanGoalEnteredTimestamp\n", "output_data_cols:\n", @@ -3734,7 +3720,7 @@ "- cplgoalvalue\n", "- cplgoalstatus\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:06.901\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for carePlanInfectiousDisease:\n", + "\u001b[32m2024-12-14 17:20:07.092\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for carePlanInfectiousDisease:\n", "offset_col: cplinfectdiseaseoffset\n", "pseudotime_col: carePlanInfectDiseaseEnteredTimestamp\n", "output_data_cols:\n", @@ -3743,7 +3729,7 @@ "- responsetotherapy\n", "- treatment\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:06.902\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for diagnosis:\n", + "\u001b[32m2024-12-14 17:20:07.092\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for diagnosis:\n", "offset_col: diagnosisoffset\n", "pseudotime_col: diagnosisEnteredTimestamp\n", "output_data_cols:\n", @@ -3754,7 +3740,7 @@ "- Though we use it, the `diagnosisString` field documentation is unclear -- by what\n", " is it separated?\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:06.902\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for infusionDrug:\n", + "\u001b[32m2024-12-14 17:20:07.093\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for infusionDrug:\n", "offset_col: infusionoffset\n", "pseudotime_col: infusionEnteredTimestamp\n", "output_data_cols:\n", @@ -3766,7 +3752,7 @@ "- volumeoffluid\n", "- patientweight\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:06.903\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for lab:\n", + "\u001b[32m2024-12-14 17:20:07.093\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for lab:\n", "offset_col: labresultoffset\n", "pseudotime_col: labResultDrawnTimestamp\n", "output_data_cols:\n", @@ -3780,7 +3766,7 @@ "- Is this the time the lab was drawn? Entered? The time the result came in?\n", "- We **IGNORE** the `labResultRevisedOffset` column -- this may be a mistake!\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:06.903\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for medication:\n", + "\u001b[32m2024-12-14 17:20:07.093\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for medication:\n", "offset_col:\n", "- drugorderoffset\n", "- drugstartoffset\n", @@ -3803,7 +3789,7 @@ "warning_items:\n", "- We **IGNORE** the `drugOrderCancelled` column -- this may be a mistake!\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:06.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for nurseAssessment:\n", + "\u001b[32m2024-12-14 17:20:07.094\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for nurseAssessment:\n", "offset_col:\n", "- nurseassessoffset\n", "- nurseassessentryoffset\n", @@ -3819,7 +3805,7 @@ "- Should we be using `cellAttributePath` instead of `cellAttribute`?\n", "- SOME MAY BE LISTS\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:06.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for nurseCare:\n", + "\u001b[32m2024-12-14 17:20:07.094\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for nurseCare:\n", "offset_col:\n", "- nursecareoffset\n", "- nursecareentryoffset\n", @@ -3835,7 +3821,7 @@ "- Should we be using `cellAttributePath` instead of `cellAttribute`?\n", "- SOME MAY BE LISTS\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:06.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for nurseCharting:\n", + "\u001b[32m2024-12-14 17:20:07.095\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for nurseCharting:\n", "offset_col:\n", "- nursingchartoffset\n", "- nursingchartentryoffset\n", @@ -3851,7 +3837,7 @@ "warning_items:\n", "- SOME MAY BE LISTS\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:06.905\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for pastHistory:\n", + "\u001b[32m2024-12-14 17:20:07.095\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for pastHistory:\n", "offset_col:\n", "- pasthistoryoffset\n", "- pasthistoryenteredoffset\n", @@ -3869,7 +3855,7 @@ "- How should we use `pastHistoryPath` vs. `pastHistoryNoteType`?\n", "- How should we use `pastHistoryValue` vs. `pastHistoryValueText`?\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:06.905\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for physicalExam:\n", + "\u001b[32m2024-12-14 17:20:07.096\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for physicalExam:\n", "offset_col: physicalexamoffset\n", "pseudotime_col: physicalExamEnteredTimestamp\n", "output_data_cols:\n", @@ -3881,7 +3867,7 @@ "- How should we use `physicalExamValue` vs. `physicalExamText`?\n", "- I believe the `physicalExamValue` is a **LIST**. This must be processed specially.\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:06.906\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for respiratoryCare:\n", + "\u001b[32m2024-12-14 17:20:07.097\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for respiratoryCare:\n", "offset_col:\n", "- respcarestatusoffset\n", "- ventstartoffset\n", @@ -3924,7 +3910,7 @@ "- We might be able to use `priorVent` timestamps to further refine true season of\n", " unit admission.\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:06.907\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for respiratoryCharting:\n", + "\u001b[32m2024-12-14 17:20:07.097\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for respiratoryCharting:\n", "offset_col:\n", "- respchartoffset\n", "- respchartentryoffset\n", @@ -3939,7 +3925,7 @@ "warning_items:\n", "- SOME MAY BE LISTS\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:06.907\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for treatment:\n", + "\u001b[32m2024-12-14 17:20:07.097\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for treatment:\n", "offset_col: treatmentoffset\n", "pseudotime_col: treatmentEnteredTimestamp\n", "output_data_cols:\n", @@ -3948,7 +3934,7 @@ "warning_items:\n", "- Absence of entries in table do not indicate absence of treatments\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:06.908\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for vitalAperiodic:\n", + "\u001b[32m2024-12-14 17:20:07.098\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for vitalAperiodic:\n", "offset_col: observationoffset\n", "pseudotime_col: observationEnteredTimestamp\n", "output_data_cols:\n", @@ -3964,7 +3950,7 @@ "- pvr\n", "- pvri\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:06.908\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for vitalPeriodic:\n", + "\u001b[32m2024-12-14 17:20:07.098\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for vitalPeriodic:\n", "offset_col: observationoffset\n", "pseudotime_col: observationEnteredTimestamp\n", "output_data_cols:\n", @@ -3988,48 +3974,3141 @@ "warning_items:\n", "- These are 5-minute median values. There are going to be a *lot* of events.\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:06.909\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m291\u001b[0m - \u001b[1mReloading processed patient df from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/patient.parquet\u001b[0m\n", - "Done with diagnosis. Continuing\n", - "Done with vitalAperiodic. Continuing\n", - "Done with admissionDx. Continuing\n", - "Done with respiratoryCare. Continuing\n", - "Done with nurseAssessment. Continuing\n", - "Done with vitalPeriodic. Continuing\n", - "Done with carePlanGeneral. Continuing\n", - "Done with carePlanGoal. Continuing\n", - "Done with treatment. Continuing\n", - "\u001b[32m2024-12-14 16:18:07.013\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping apacheApsVar as it is not supported in this pipeline.\u001b[0m\n", - "Done with carePlanEOL. Continuing\n", - "\u001b[32m2024-12-14 16:18:07.013\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m331\u001b[0m - \u001b[33m\u001b[1mNo function needed for infusiondrug. For eICU, THIS IS UNEXPECTED\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:07.013\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping carePlanCareProvider as it is not supported in this pipeline.\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:07.013\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping microLab as it is not supported in this pipeline.\u001b[0m\n", - "Done with nurseCare. Continuing\n", - "Done with physicalExam. Continuing\n", - "Done with respiratoryCharting. Continuing\n", - "\u001b[32m2024-12-14 16:18:07.013\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping note as it is not supported in this pipeline.\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:07.013\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping admissiondrug as it is not supported in this pipeline.\u001b[0m\n", - "Done with lab. Continuing\n", - "\u001b[32m2024-12-14 16:18:07.013\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping apachePredVar as it is not supported in this pipeline.\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:07.013\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping customLab as it is not supported in this pipeline.\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:07.013\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping apachePatientResult as it is not supported in this pipeline.\u001b[0m\n", - "Done with carePlanInfectiousDisease. Continuing\n", - "Done with allergy. Continuing\n", - "Done with nurseCharting. Continuing\n", - "Done with pastHistory. Continuing\n", - "Done with medication. Continuing\n", - "\u001b[32m2024-12-14 16:18:07.014\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping intakeOutput as it is not supported in this pipeline.\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:07.014\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m352\u001b[0m - \u001b[1mDone! All dataframes processed and written to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.098\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m294\u001b[0m - \u001b[1mProcessing patient table first...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.099\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m298\u001b[0m - \u001b[1mLoading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/hospital.csv.gz...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.105\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m302\u001b[0m - \u001b[1mLoading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/patient.csv.gz...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.116\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m305\u001b[0m - \u001b[1mProcessing patient table...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.117\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_patient\u001b[0m:\u001b[36m101\u001b[0m - \u001b[1mChecking that the 24h times are consistent. If this is extremely slow, consider refactoring to have only one `.collect()` call.\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.117\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mcheck_timestamps_agree\u001b[0m:\u001b[36m57\u001b[0m - \u001b[1mChecking that stated 24h times are consistent given offsets between {pseudotime_col.name} and hospitaldischargetime24...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.126\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mcheck_timestamps_agree\u001b[0m:\u001b[36m57\u001b[0m - \u001b[1mChecking that stated 24h times are consistent given offsets between {pseudotime_col.name} and hospitaladmittime24...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.129\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mcheck_timestamps_agree\u001b[0m:\u001b[36m57\u001b[0m - \u001b[1mChecking that stated 24h times are consistent given offsets between {pseudotime_col.name} and unitadmittime24...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.132\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mcheck_timestamps_agree\u001b[0m:\u001b[36m57\u001b[0m - \u001b[1mChecking that stated 24h times are consistent given offsets between {pseudotime_col.name} and unitdischargetime24...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.135\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_patient\u001b[0m:\u001b[36m109\u001b[0m - \u001b[1mValidated 24h times in 0:00:00.017329\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.135\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_patient\u001b[0m:\u001b[36m111\u001b[0m - \u001b[33m\u001b[1mNOT validating the `unitVisitNumber` column as that isn't implemented yet.\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.135\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_patient\u001b[0m:\u001b[36m113\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING. Check with the eICU team:\n", + " - `apacheAdmissionDx` is not selected from the patients table as we grab it from `admissiondx`. Is this right?\n", + " - `admissionHeight` and `admissionWeight` are interpreted as **unit** admission height/weight, not hospital admission height/weight. Is this right?\n", + " - `age` is interpreted as the age at the time of the unit stay, not the hospital stay. Is this right?\n", + " - `What is the actual mean age for those > 89? Here we assume 90.\n", + " - Note that all the column names appear to be all in lowercase for the csv versions, vs. the docs\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.163\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing diagnosis...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.184\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/diagnosis.csv.gz in 0:00:00.021635\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.184\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for diagnosis table. Check with the eICU team:\n", + " - Though we use it, the `diagnosisString` field documentation is unclear -- by what is it separated?\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.201\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/diagnosis.parquet in 0:00:00.038430\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.201\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing vitalAperiodic...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.360\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/vitalAperiodic.csv.gz in 0:00:00.158846\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.430\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalAperiodic.parquet in 0:00:00.228263\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.430\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing admissionDx...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.437\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/admissionDx.csv.gz in 0:00:00.007296\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.437\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for admissionDx table. Check with the eICU team:\n", + " - How should we use `admitdxtest`?\n", + " - How should we use `admitdxpath`?\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.449\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/admissionDx.parquet in 0:00:00.019188\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.449\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing respiratoryCare...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.459\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/respiratoryCare.csv.gz in 0:00:00.009363\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.459\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for respiratoryCare table. Check with the eICU team:\n", + " - We ignore the `priorVent*` columns -- this may be a mistake!\n", + " - There is a lot of data in this table -- what should be incorporated into the event structure?\n", + " - We might be able to use `priorVent` timestamps to further refine true season of unit admission.\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.472\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCare.parquet in 0:00:00.022688\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.472\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing nurseAssessment...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.560\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/nurseAssessment.csv.gz in 0:00:00.087465\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.560\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for nurseAssessment table. Check with the eICU team:\n", + " - Should we be using `cellAttributePath` instead of `cellAttribute`?\n", + " - SOME MAY BE LISTS\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.593\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseAssessment.parquet in 0:00:00.120698\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.593\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing vitalPeriodic...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:08.837\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/vitalPeriodic.csv.gz in 0:00:01.244370\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:08.838\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for vitalPeriodic table. Check with the eICU team:\n", + " - These are 5-minute median values. There are going to be a *lot* of events.\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.222\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalPeriodic.parquet in 0:00:01.629149\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.222\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing carePlanGeneral...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.244\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/carePlanGeneral.csv.gz in 0:00:00.021348\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.260\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGeneral.parquet in 0:00:00.037551\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.260\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing carePlanGoal...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.263\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/carePlanGoal.csv.gz in 0:00:00.003226\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.273\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGoal.parquet in 0:00:00.012718\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.273\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing treatment...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.296\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/treatment.csv.gz in 0:00:00.023153\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.296\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for treatment table. Check with the eICU team:\n", + " - Absence of entries in table do not indicate absence of treatments\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.320\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/treatment.parquet in 0:00:00.046999\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.320\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping apacheApsVar as it is not supported in this pipeline.\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.320\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing carePlanEOL...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.321\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/carePlanEOL.csv.gz in 0:00:00.000273\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.321\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for carePlanEOL table. Check with the eICU team:\n", + " - Is the DiscussionOffset time actually reliable? Should we fall back on the SaveOffset time?\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.329\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanEOL.parquet in 0:00:00.008632\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.329\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m331\u001b[0m - \u001b[33m\u001b[1mNo function needed for infusiondrug. For eICU, THIS IS UNEXPECTED\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.329\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping carePlanCareProvider as it is not supported in this pipeline.\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.329\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping microLab as it is not supported in this pipeline.\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.329\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing nurseCare...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.368\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/nurseCare.csv.gz in 0:00:00.038466\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.368\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for nurseCare table. Check with the eICU team:\n", + " - Should we be using `cellAttributePath` instead of `cellAttribute`?\n", + " - SOME MAY BE LISTS\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.387\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCare.parquet in 0:00:00.057232\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.387\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing physicalExam...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.449\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/physicalExam.csv.gz in 0:00:00.062587\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.450\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for physicalExam table. Check with the eICU team:\n", + " - How should we use `physicalExamValue` vs. `physicalExamText`?\n", + " - I believe the `physicalExamValue` is a **LIST**. This must be processed specially.\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.483\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/physicalExam.parquet in 0:00:00.096029\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.483\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing respiratoryCharting...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.600\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/respiratoryCharting.csv.gz in 0:00:00.116960\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.600\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for respiratoryCharting table. Check with the eICU team:\n", + " - SOME MAY BE LISTS\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.657\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCharting.parquet in 0:00:00.173772\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.657\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping note as it is not supported in this pipeline.\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.657\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping admissiondrug as it is not supported in this pipeline.\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.657\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing lab...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:10.045\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/lab.csv.gz in 0:00:00.387661\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:10.045\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for lab table. Check with the eICU team:\n", + " - Is this the time the lab was drawn? Entered? The time the result came in?\n", + " - We **IGNORE** the `labResultRevisedOffset` column -- this may be a mistake!\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:10.143\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/lab.parquet in 0:00:00.485708\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:10.143\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping apachePredVar as it is not supported in this pipeline.\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:10.143\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping customLab as it is not supported in this pipeline.\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:10.143\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping apachePatientResult as it is not supported in this pipeline.\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:10.144\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing carePlanInfectiousDisease...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:10.144\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/carePlanInfectiousDisease.csv.gz in 0:00:00.000544\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:10.153\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanInfectiousDisease.parquet in 0:00:00.009958\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:10.154\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing allergy...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:10.157\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/allergy.csv.gz in 0:00:00.003649\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:10.157\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for allergy table. Check with the eICU team:\n", + " - How should we use `allergyNoteType`?\n", + " - How should we use `specialtyType`?\n", + " - How should we use `userType`?\n", + " - Is `drugName` the name of the drug to which the patient is allergic or the drug given to the patient (docs say 'name of the selected admission drug')?\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:10.165\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/allergy.parquet in 0:00:00.011618\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:10.165\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing nurseCharting...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:11.333\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/nurseCharting.csv.gz in 0:00:01.167813\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:11.334\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for nurseCharting table. Check with the eICU team:\n", + " - SOME MAY BE LISTS\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:11.706\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCharting.parquet in 0:00:01.540825\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:11.707\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing pastHistory...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:11.719\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/pastHistory.csv.gz in 0:00:00.012306\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:11.719\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for pastHistory table. Check with the eICU team:\n", + " - SOME MAY BE LISTS\n", + " - How should we use `pastHistoryPath` vs. `pastHistoryNoteType`?\n", + " - How should we use `pastHistoryValue` vs. `pastHistoryValueText`?\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:11.734\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/pastHistory.parquet in 0:00:00.027972\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:11.735\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing medication...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:11.859\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/medication.csv.gz in 0:00:00.124266\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:11.859\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for medication table. Check with the eICU team:\n", + " - We **IGNORE** the `drugOrderCancelled` column -- this may be a mistake!\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:11.903\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/medication.parquet in 0:00:00.168091\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:11.903\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping intakeOutput as it is not supported in this pipeline.\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:11.903\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m352\u001b[0m - \u001b[1mDone! All dataframes processed and written to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds\u001b[0m\n", "Setting N_WORKERS to 1 to avoid issues with the runners.\n", "Running extraction pipeline.\n", - "\u001b[32m2024-12-14 16:18:07.561\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m324\u001b[0m - \u001b[1mSkipping stage shard_events as it is already complete.\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:07.561\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m324\u001b[0m - \u001b[1mSkipping stage split_and_shard_subjects as it is already complete.\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:07.561\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m324\u001b[0m - \u001b[1mSkipping stage convert_to_sharded_events as it is already complete.\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:07.561\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: merge_to_MEDS_cohort\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:07.579\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-merge_to_MEDS_cohort --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs --config-name=extract_eICU 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=merge_to_MEDS_cohort\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:53.120\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[32m2024-12-14 17:20:12.455\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: shard_events\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:12.472\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-shard_events --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs --config-name=extract_eICU 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=shard_events\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:14.418\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:14.418\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 17:20:13.013 | INFO | MEDS_transforms.extract.shard_events:main:330 - Running with config:\n", + "input_dir: ${oc.env:EICU_PRE_MEDS_DIR}\n", + "cohort_dir: ${oc.env:EICU_MEDS_COHORT_DIR}\n", + "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", + " at the top of your specific pipeline\n", + "\n", + " configuration file.'\n", + "log_dir: ${stage_cfg.output_dir}/.logs\n", + "do_overwrite: false\n", + "seed: 1\n", + "stages:\n", + "- shard_events\n", + "- split_and_shard_subjects\n", + "- convert_to_sharded_events\n", + "- merge_to_MEDS_cohort\n", + "- finalize_MEDS_metadata\n", + "- finalize_MEDS_data\n", + "stage_configs:\n", + " shard_events:\n", + " row_chunksize: 200000000\n", + " infer_schema_length: 999999999\n", + " data_input_dir: ${input_dir}\n", + " split_and_shard_subjects:\n", + " is_metadata: true\n", + " output_dir: ${cohort_dir}/metadata\n", + " n_subjects_per_shard: 10000\n", + " external_splits_json_fp: null\n", + " split_fracs:\n", + " train: 0.5\n", + " tuning: 0.25\n", + " held_out: 0.25\n", + " convert_to_sharded_events:\n", + " do_dedup_text_and_numeric: true\n", + " merge_to_MEDS_cohort:\n", + " unique_by: null\n", + " additional_sort_by: null\n", + " extract_code_metadata:\n", + " is_metadata: true\n", + " description_separator: '\n", + "\n", + " '\n", + " finalize_MEDS_metadata:\n", + " is_metadata: true\n", + " do_retype: true\n", + " finalize_MEDS_data:\n", + " do_retype: true\n", + "worker: 0\n", + "polling_time: 300\n", + "stage: shard_events\n", + "stage_cfg: ${oc.create:${populate_stage:${stage}, ${input_dir}, ${cohort_dir}, ${stages},\n", + " ${stage_configs}}}\n", + "etl_metadata:\n", + " pipeline_name: ???\n", + " dataset_name: eICU\n", + " dataset_version: 2.0\n", + " package_name: ${get_package_name:}\n", + " package_version: ${get_package_version:}\n", + "etl_metadata.pipeline_name: extract\n", + "description: \"This pipeline extracts the eICU dataset in longitudinal, sparse form\\\n", + " \\ from an input dataset meeting\\nselect criteria and converts them to the flattened,\\\n", + " \\ MEDS format. You can control the key arguments to this\\npipeline by setting environment\\\n", + " \\ variables:\\n```bash\\n export EVENT_CONVERSION_CONFIG_FP=# Path to your event\\\n", + " \\ conversion config\\n export EICU_PRE_MEDS_DIR=# Path to the output dir of the\\\n", + " \\ pre-MEDS step\\n export EICU_MEDS_COHORT_DIR=# Path to where you want the dataset\\\n", + " \\ to live\\n```\"\n", + "event_conversion_config_fp: ${oc.env:EVENT_CONVERSION_CONFIG_FP}\n", + "shards_map_fp: ${cohort_dir}/metadata/.shards.json\n", + "\n", + "Stage: shard_events\n", + "\n", + "Stage config:\n", + "row_chunksize: 200000000\n", + "infer_schema_length: 999999999\n", + "data_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo//pre_meds/\n", + "is_metadata: false\n", + "metadata_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/metadata\n", + "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events\n", + "reducer_output_dir: null\n", + "\n", + "2024-12-14 17:20:13.017 | INFO | MEDS_transforms.extract.shard_events:main:342 - Reading event conversion config from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs/event_configs.yaml to identify needed columns.\n", + "2024-12-14 17:20:13.069 | INFO | MEDS_transforms.extract.shard_events:main:368 - Starting event sub-sharding. Sub-sharding 20 files:\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanEOL.parquet\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCharting.parquet\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/medication.parquet\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGoal.parquet\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanInfectiousDisease.parquet\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalPeriodic.parquet\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/patient.parquet\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/lab.parquet\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/treatment.parquet\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseAssessment.parquet\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/physicalExam.parquet\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/admissionDx.parquet\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/diagnosis.parquet\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCharting.parquet\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCare.parquet\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGeneral.parquet\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/pastHistory.parquet\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalAperiodic.parquet\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/allergy.parquet\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCare.parquet\n", + "2024-12-14 17:20:13.071 | INFO | MEDS_transforms.extract.shard_events:main:372 - Will read raw data from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/$IN_FILE.parquet and write sub-sharded data to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/$IN_FILE/$ROW_START-$ROW_END.parquet\n", + "2024-12-14 17:20:13.073 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanEOL.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL.\n", + "2024-12-14 17:20:13.073 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanEOL.parquet to determine row count.\n", + "2024-12-14 17:20:13.075 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:13.076 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanEOL.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:13.076 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['carePlanEolDiscussionOccurredTimestamp', 'patienthealthsystemstayid']\n", + "2024-12-14 17:20:13.082 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, carePlanEolDiscussionOccurredTimestamp, patienthealthsystemstayid\n", + "2024-12-14 17:20:13.088 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 15 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanEOL.parquet.\n", + "2024-12-14 17:20:13.088 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanEOL.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:20:13.088 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanEOL.parquet row-chunk [0-15) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL/[0-15).parquet.\n", + "2024-12-14 17:20:13.091 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:13.091462. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:13.093 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanEOL.parquet\n", + "2024-12-14 17:20:13.093 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:13.093 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanEOL.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:13.093 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['carePlanEolDiscussionOccurredTimestamp', 'patienthealthsystemstayid']\n", + "2024-12-14 17:20:13.093 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, carePlanEolDiscussionOccurredTimestamp, patienthealthsystemstayid\n", + "2024-12-14 17:20:13.093 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:13.094 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL/[0-15).parquet\n", + "2024-12-14 17:20:13.101 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.010042\n", + "2024-12-14 17:20:13.101 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL/.[0-15).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL/.[0-15).parquet_cache/locks/2024-12-14T17:20:13.091462.json\n", + "2024-12-14 17:20:13.104 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCharting.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting.\n", + "2024-12-14 17:20:13.104 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCharting.parquet to determine row count.\n", + "2024-12-14 17:20:13.106 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:13.106 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCharting.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:13.106 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['nursingChartEnteredTimestamp', 'nursingChartPerformedTimestamp', 'nursingchartcelltypecat', 'nursingchartcelltypevallabel', 'nursingchartcelltypevalname', 'nursingchartid', 'nursingchartvalue', 'patienthealthsystemstayid']\n", + "2024-12-14 17:20:13.107 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, nursingChartEnteredTimestamp, nursingChartPerformedTimestamp, nursingchartcelltypecat, nursingchartcelltypevallabel, nursingchartcelltypevalname, nursingchartid, nursingchartvalue, patienthealthsystemstayid\n", + "2024-12-14 17:20:13.118 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 1477163 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCharting.parquet.\n", + "2024-12-14 17:20:13.118 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCharting.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:20:13.118 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCharting.parquet row-chunk [0-1477163) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting/[0-1477163).parquet.\n", + "2024-12-14 17:20:13.120 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:13.120700. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:13.121 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCharting.parquet\n", + "2024-12-14 17:20:13.121 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:13.121 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCharting.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:13.121 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['nursingChartEnteredTimestamp', 'nursingChartPerformedTimestamp', 'nursingchartcelltypecat', 'nursingchartcelltypevallabel', 'nursingchartcelltypevalname', 'nursingchartid', 'nursingchartvalue', 'patienthealthsystemstayid']\n", + "2024-12-14 17:20:13.121 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, nursingChartEnteredTimestamp, nursingChartPerformedTimestamp, nursingchartcelltypecat, nursingchartcelltypevallabel, nursingchartcelltypevalname, nursingchartid, nursingchartvalue, patienthealthsystemstayid\n", + "2024-12-14 17:20:13.121 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:13.121 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting/[0-1477163).parquet\n", + "2024-12-14 17:20:13.501 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.380454\n", + "2024-12-14 17:20:13.501 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting/.[0-1477163).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting/.[0-1477163).parquet_cache/locks/2024-12-14T17:20:13.120700.json\n", + "2024-12-14 17:20:13.504 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/medication.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication.\n", + "2024-12-14 17:20:13.504 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/medication.parquet to determine row count.\n", + "2024-12-14 17:20:13.506 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:13.507 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/medication.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:13.507 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['dosage', 'drugivadmixture', 'drugname', 'drugordertimestamp', 'drugstarttimestamp', 'drugstoptimestamp', 'frequency', 'gtc', 'loadingdose', 'medicationid', 'patienthealthsystemstayid', 'prn', 'routeadmin']\n", + "2024-12-14 17:20:13.507 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, dosage, drugivadmixture, drugname, drugordertimestamp, drugstarttimestamp, drugstoptimestamp, frequency, gtc, loadingdose, medicationid, patienthealthsystemstayid, prn, routeadmin\n", + "2024-12-14 17:20:13.508 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 75604 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/medication.parquet.\n", + "2024-12-14 17:20:13.508 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/medication.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:20:13.508 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/medication.parquet row-chunk [0-75604) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication/[0-75604).parquet.\n", + "2024-12-14 17:20:13.510 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:13.510654. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:13.511 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/medication.parquet\n", + "2024-12-14 17:20:13.511 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:13.511 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/medication.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:13.511 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['dosage', 'drugivadmixture', 'drugname', 'drugordertimestamp', 'drugstarttimestamp', 'drugstoptimestamp', 'frequency', 'gtc', 'loadingdose', 'medicationid', 'patienthealthsystemstayid', 'prn', 'routeadmin']\n", + "2024-12-14 17:20:13.511 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, dosage, drugivadmixture, drugname, drugordertimestamp, drugstarttimestamp, drugstoptimestamp, frequency, gtc, loadingdose, medicationid, patienthealthsystemstayid, prn, routeadmin\n", + "2024-12-14 17:20:13.511 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:13.511 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication/[0-75604).parquet\n", + "2024-12-14 17:20:13.548 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.037873\n", + "2024-12-14 17:20:13.548 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication/.[0-75604).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication/.[0-75604).parquet_cache/locks/2024-12-14T17:20:13.510654.json\n", + "2024-12-14 17:20:13.551 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGoal.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal.\n", + "2024-12-14 17:20:13.551 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGoal.parquet to determine row count.\n", + "2024-12-14 17:20:13.553 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:13.553 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGoal.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:13.553 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['carePlanGoalEnteredTimestamp', 'cplgoalcategory', 'cplgoalstatus', 'cplgoalvalue', 'patienthealthsystemstayid']\n", + "2024-12-14 17:20:13.553 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, carePlanGoalEnteredTimestamp, cplgoalcategory, cplgoalstatus, cplgoalvalue, patienthealthsystemstayid\n", + "2024-12-14 17:20:13.554 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 3633 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGoal.parquet.\n", + "2024-12-14 17:20:13.554 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGoal.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:20:13.554 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGoal.parquet row-chunk [0-3633) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal/[0-3633).parquet.\n", + "2024-12-14 17:20:13.557 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:13.557013. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:13.557 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGoal.parquet\n", + "2024-12-14 17:20:13.557 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:13.557 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGoal.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:13.557 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['carePlanGoalEnteredTimestamp', 'cplgoalcategory', 'cplgoalstatus', 'cplgoalvalue', 'patienthealthsystemstayid']\n", + "2024-12-14 17:20:13.557 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, carePlanGoalEnteredTimestamp, cplgoalcategory, cplgoalstatus, cplgoalvalue, patienthealthsystemstayid\n", + "2024-12-14 17:20:13.557 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:13.557 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal/[0-3633).parquet\n", + "2024-12-14 17:20:13.560 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.003061\n", + "2024-12-14 17:20:13.560 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal/.[0-3633).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal/.[0-3633).parquet_cache/locks/2024-12-14T17:20:13.557013.json\n", + "2024-12-14 17:20:13.562 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanInfectiousDisease.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease.\n", + "2024-12-14 17:20:13.562 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanInfectiousDisease.parquet to determine row count.\n", + "2024-12-14 17:20:13.564 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:13.564 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanInfectiousDisease.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:13.564 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['carePlanInfectDiseaseEnteredTimestamp', 'infectdiseaseassessment', 'infectdiseasesite', 'patienthealthsystemstayid', 'responsetotherapy', 'treatment']\n", + "2024-12-14 17:20:13.564 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, carePlanInfectDiseaseEnteredTimestamp, infectdiseaseassessment, infectdiseasesite, patienthealthsystemstayid, responsetotherapy, treatment\n", + "2024-12-14 17:20:13.565 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 112 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanInfectiousDisease.parquet.\n", + "2024-12-14 17:20:13.565 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanInfectiousDisease.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:20:13.565 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanInfectiousDisease.parquet row-chunk [0-112) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease/[0-112).parquet.\n", + "2024-12-14 17:20:13.567 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:13.567600. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:13.567 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanInfectiousDisease.parquet\n", + "2024-12-14 17:20:13.567 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:13.567 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanInfectiousDisease.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:13.568 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['carePlanInfectDiseaseEnteredTimestamp', 'infectdiseaseassessment', 'infectdiseasesite', 'patienthealthsystemstayid', 'responsetotherapy', 'treatment']\n", + "2024-12-14 17:20:13.568 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, carePlanInfectDiseaseEnteredTimestamp, infectdiseaseassessment, infectdiseasesite, patienthealthsystemstayid, responsetotherapy, treatment\n", + "2024-12-14 17:20:13.568 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:13.568 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease/[0-112).parquet\n", + "2024-12-14 17:20:13.569 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.001941\n", + "2024-12-14 17:20:13.569 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease/.[0-112).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease/.[0-112).parquet_cache/locks/2024-12-14T17:20:13.567600.json\n", + "2024-12-14 17:20:13.571 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalPeriodic.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic.\n", + "2024-12-14 17:20:13.571 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalPeriodic.parquet to determine row count.\n", + "2024-12-14 17:20:13.574 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:13.574 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalPeriodic.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:13.574 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['cvp', 'etco2', 'heartrate', 'icp', 'observationEnteredTimestamp', 'padiastolic', 'pamean', 'pasystolic', 'patienthealthsystemstayid', 'respiration', 'sao2', 'st1', 'st2', 'st3', 'systemicdiastolic', 'systemicmean', 'systemicsystolic', 'temperature', 'vitalperiodicid']\n", + "2024-12-14 17:20:13.574 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, cvp, etco2, heartrate, icp, observationEnteredTimestamp, padiastolic, pamean, pasystolic, patienthealthsystemstayid, respiration, sao2, st1, st2, st3, systemicdiastolic, systemicmean, systemicsystolic, temperature, vitalperiodicid\n", + "2024-12-14 17:20:13.576 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 1634960 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalPeriodic.parquet.\n", + "2024-12-14 17:20:13.576 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalPeriodic.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:20:13.576 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalPeriodic.parquet row-chunk [0-1634960) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic/[0-1634960).parquet.\n", + "2024-12-14 17:20:13.579 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:13.578964. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:13.579 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalPeriodic.parquet\n", + "2024-12-14 17:20:13.579 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:13.579 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalPeriodic.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:13.579 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['cvp', 'etco2', 'heartrate', 'icp', 'observationEnteredTimestamp', 'padiastolic', 'pamean', 'pasystolic', 'patienthealthsystemstayid', 'respiration', 'sao2', 'st1', 'st2', 'st3', 'systemicdiastolic', 'systemicmean', 'systemicsystolic', 'temperature', 'vitalperiodicid']\n", + "2024-12-14 17:20:13.579 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, cvp, etco2, heartrate, icp, observationEnteredTimestamp, padiastolic, pamean, pasystolic, patienthealthsystemstayid, respiration, sao2, st1, st2, st3, systemicdiastolic, systemicmean, systemicsystolic, temperature, vitalperiodicid\n", + "2024-12-14 17:20:13.579 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:13.579 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 17:20:13.942 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.363615\n", + "2024-12-14 17:20:13.942 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic/.[0-1634960).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic/.[0-1634960).parquet_cache/locks/2024-12-14T17:20:13.578964.json\n", + "2024-12-14 17:20:13.945 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/patient.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient.\n", + "2024-12-14 17:20:13.945 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/patient.parquet to determine row count.\n", + "2024-12-14 17:20:13.947 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:13.947 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/patient.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:13.947 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['dateofbirth', 'ethnicity', 'gender', 'hospitaladmitsource', 'hospitaladmittimestamp', 'hospitaldischargelocation', 'hospitaldischargestatus', 'hospitaldischargetimestamp', 'hospitalid', 'hospitalnumbedscategory', 'hospitalregion', 'hospitalteachingstatus', 'patienthealthsystemstayid', 'patientunitstayid', 'uniquepid', 'unitadmissionheight', 'unitadmissionweight', 'unitadmitsource', 'unitadmittimestamp', 'unitdischargelocation', 'unitdischargestatus', 'unitdischargetimestamp', 'unitdischargeweight', 'unitstaytype', 'wardid']\n", + "2024-12-14 17:20:13.948 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, dateofbirth, ethnicity, gender, hospitaladmitsource, hospitaladmittimestamp, hospitaldischargelocation, hospitaldischargestatus, hospitaldischargetimestamp, hospitalid, hospitalnumbedscategory, hospitalregion, hospitalteachingstatus, patienthealthsystemstayid, patientunitstayid, uniquepid, unitadmissionheight, unitadmissionweight, unitadmitsource, unitadmittimestamp, unitdischargelocation, unitdischargestatus, unitdischargetimestamp, unitdischargeweight, unitstaytype, wardid\n", + "2024-12-14 17:20:13.948 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 2520 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/patient.parquet.\n", + "2024-12-14 17:20:13.948 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/patient.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:20:13.948 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/patient.parquet row-chunk [0-2520) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient/[0-2520).parquet.\n", + "2024-12-14 17:20:13.951 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:13.950962. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:13.951 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/patient.parquet\n", + "2024-12-14 17:20:13.951 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:13.951 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/patient.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:13.951 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['dateofbirth', 'ethnicity', 'gender', 'hospitaladmitsource', 'hospitaladmittimestamp', 'hospitaldischargelocation', 'hospitaldischargestatus', 'hospitaldischargetimestamp', 'hospitalid', 'hospitalnumbedscategory', 'hospitalregion', 'hospitalteachingstatus', 'patienthealthsystemstayid', 'patientunitstayid', 'uniquepid', 'unitadmissionheight', 'unitadmissionweight', 'unitadmitsource', 'unitadmittimestamp', 'unitdischargelocation', 'unitdischargestatus', 'unitdischargetimestamp', 'unitdischargeweight', 'unitstaytype', 'wardid']\n", + "2024-12-14 17:20:13.951 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, dateofbirth, ethnicity, gender, hospitaladmitsource, hospitaladmittimestamp, hospitaldischargelocation, hospitaldischargestatus, hospitaldischargetimestamp, hospitalid, hospitalnumbedscategory, hospitalregion, hospitalteachingstatus, patienthealthsystemstayid, patientunitstayid, uniquepid, unitadmissionheight, unitadmissionweight, unitadmitsource, unitadmittimestamp, unitdischargelocation, unitdischargestatus, unitdischargetimestamp, unitdischargeweight, unitstaytype, wardid\n", + "2024-12-14 17:20:13.951 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:13.951 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient/[0-2520).parquet\n", + "2024-12-14 17:20:13.961 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.010891\n", + "2024-12-14 17:20:13.961 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient/.[0-2520).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient/.[0-2520).parquet_cache/locks/2024-12-14T17:20:13.950962.json\n", + "2024-12-14 17:20:13.964 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/lab.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab.\n", + "2024-12-14 17:20:13.964 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/lab.parquet to determine row count.\n", + "2024-12-14 17:20:13.966 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:13.967 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/lab.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:13.967 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['labResultDrawnTimestamp', 'labmeasurenameinterface', 'labmeasurenamesystem', 'labname', 'labresult', 'labresulttext', 'labtypeid', 'patienthealthsystemstayid']\n", + "2024-12-14 17:20:13.967 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, labResultDrawnTimestamp, labmeasurenameinterface, labmeasurenamesystem, labname, labresult, labresulttext, labtypeid, patienthealthsystemstayid\n", + "2024-12-14 17:20:13.969 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 434660 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/lab.parquet.\n", + "2024-12-14 17:20:13.969 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/lab.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:20:13.969 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/lab.parquet row-chunk [0-434660) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab/[0-434660).parquet.\n", + "2024-12-14 17:20:13.972 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:13.972482. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:13.972 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/lab.parquet\n", + "2024-12-14 17:20:13.972 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:13.972 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/lab.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:13.972 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['labResultDrawnTimestamp', 'labmeasurenameinterface', 'labmeasurenamesystem', 'labname', 'labresult', 'labresulttext', 'labtypeid', 'patienthealthsystemstayid']\n", + "2024-12-14 17:20:13.973 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, labResultDrawnTimestamp, labmeasurenameinterface, labmeasurenamesystem, labname, labresult, labresulttext, labtypeid, patienthealthsystemstayid\n", + "2024-12-14 17:20:13.973 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:13.973 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab/[0-434660).parquet\n", + "2024-12-14 17:20:14.058 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.085536\n", + "2024-12-14 17:20:14.058 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab/.[0-434660).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab/.[0-434660).parquet_cache/locks/2024-12-14T17:20:13.972482.json\n", + "2024-12-14 17:20:14.061 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/treatment.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment.\n", + "2024-12-14 17:20:14.061 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/treatment.parquet to determine row count.\n", + "2024-12-14 17:20:14.063 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.063 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/treatment.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.063 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['patienthealthsystemstayid', 'treatmentEnteredTimestamp', 'treatmentid', 'treatmentstring']\n", + "2024-12-14 17:20:14.063 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, patienthealthsystemstayid, treatmentEnteredTimestamp, treatmentid, treatmentstring\n", + "2024-12-14 17:20:14.064 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 38290 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/treatment.parquet.\n", + "2024-12-14 17:20:14.064 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/treatment.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:20:14.064 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/treatment.parquet row-chunk [0-38290) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment/[0-38290).parquet.\n", + "2024-12-14 17:20:14.066 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:14.066637. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:14.066 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/treatment.parquet\n", + "2024-12-14 17:20:14.067 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.067 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/treatment.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.067 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['patienthealthsystemstayid', 'treatmentEnteredTimestamp', 'treatmentid', 'treatmentstring']\n", + "2024-12-14 17:20:14.067 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, patienthealthsystemstayid, treatmentEnteredTimestamp, treatmentid, treatmentstring\n", + "2024-12-14 17:20:14.067 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:14.067 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment/[0-38290).parquet\n", + "2024-12-14 17:20:14.076 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.009360\n", + "2024-12-14 17:20:14.076 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment/.[0-38290).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment/.[0-38290).parquet_cache/locks/2024-12-14T17:20:14.066637.json\n", + "2024-12-14 17:20:14.078 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseAssessment.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment.\n", + "2024-12-14 17:20:14.079 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseAssessment.parquet to determine row count.\n", + "2024-12-14 17:20:14.080 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.081 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseAssessment.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.081 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['cellattribute', 'cellattributevalue', 'celllabel', 'nurseAssessEnteredTimestamp', 'nurseAssessPerformedTimestamp', 'nurseassessid', 'patienthealthsystemstayid']\n", + "2024-12-14 17:20:14.081 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, cellattribute, cellattributevalue, celllabel, nurseAssessEnteredTimestamp, nurseAssessPerformedTimestamp, nurseassessid, patienthealthsystemstayid\n", + "2024-12-14 17:20:14.081 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 91589 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseAssessment.parquet.\n", + "2024-12-14 17:20:14.081 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseAssessment.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:20:14.081 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseAssessment.parquet row-chunk [0-91589) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment/[0-91589).parquet.\n", + "2024-12-14 17:20:14.084 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:14.084291. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:14.084 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseAssessment.parquet\n", + "2024-12-14 17:20:14.084 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.084 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseAssessment.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.084 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['cellattribute', 'cellattributevalue', 'celllabel', 'nurseAssessEnteredTimestamp', 'nurseAssessPerformedTimestamp', 'nurseassessid', 'patienthealthsystemstayid']\n", + "2024-12-14 17:20:14.084 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, cellattribute, cellattributevalue, celllabel, nurseAssessEnteredTimestamp, nurseAssessPerformedTimestamp, nurseassessid, patienthealthsystemstayid\n", + "2024-12-14 17:20:14.084 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:14.085 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment/[0-91589).parquet\n", + "2024-12-14 17:20:14.106 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.021893\n", + "2024-12-14 17:20:14.106 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment/.[0-91589).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment/.[0-91589).parquet_cache/locks/2024-12-14T17:20:14.084291.json\n", + "2024-12-14 17:20:14.108 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/physicalExam.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam.\n", + "2024-12-14 17:20:14.108 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/physicalExam.parquet to determine row count.\n", + "2024-12-14 17:20:14.110 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.110 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/physicalExam.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.111 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['patienthealthsystemstayid', 'physicalExamEnteredTimestamp', 'physicalexamid', 'physicalexampath', 'physicalexamtext', 'physicalexamvalue']\n", + "2024-12-14 17:20:14.111 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, patienthealthsystemstayid, physicalExamEnteredTimestamp, physicalexamid, physicalexampath, physicalexamtext, physicalexamvalue\n", + "2024-12-14 17:20:14.111 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 84058 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/physicalExam.parquet.\n", + "2024-12-14 17:20:14.111 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/physicalExam.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:20:14.111 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/physicalExam.parquet row-chunk [0-84058) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam/[0-84058).parquet.\n", + "2024-12-14 17:20:14.114 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:14.113975. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:14.114 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/physicalExam.parquet\n", + "2024-12-14 17:20:14.114 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.114 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/physicalExam.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.114 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['patienthealthsystemstayid', 'physicalExamEnteredTimestamp', 'physicalexamid', 'physicalexampath', 'physicalexamtext', 'physicalexamvalue']\n", + "2024-12-14 17:20:14.114 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, patienthealthsystemstayid, physicalExamEnteredTimestamp, physicalexamid, physicalexampath, physicalexamtext, physicalexamvalue\n", + "2024-12-14 17:20:14.114 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:14.114 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam/[0-84058).parquet\n", + "2024-12-14 17:20:14.139 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.025145\n", + "2024-12-14 17:20:14.139 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam/.[0-84058).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam/.[0-84058).parquet_cache/locks/2024-12-14T17:20:14.113975.json\n", + "2024-12-14 17:20:14.141 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/admissionDx.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx.\n", + "2024-12-14 17:20:14.141 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/admissionDx.parquet to determine row count.\n", + "2024-12-14 17:20:14.143 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.144 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/admissionDx.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.144 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['admissiondxid', 'admitDxEnteredTimestamp', 'admitdxname', 'patienthealthsystemstayid', 'patientunitstayid']\n", + "2024-12-14 17:20:14.144 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, admissiondxid, admitDxEnteredTimestamp, admitdxname, patienthealthsystemstayid, patientunitstayid\n", + "2024-12-14 17:20:14.144 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 7578 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/admissionDx.parquet.\n", + "2024-12-14 17:20:14.144 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/admissionDx.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:20:14.144 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/admissionDx.parquet row-chunk [0-7578) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx/[0-7578).parquet.\n", + "2024-12-14 17:20:14.146 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:14.146860. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:14.147 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/admissionDx.parquet\n", + "2024-12-14 17:20:14.147 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.147 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/admissionDx.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.147 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['admissiondxid', 'admitDxEnteredTimestamp', 'admitdxname', 'patienthealthsystemstayid', 'patientunitstayid']\n", + "2024-12-14 17:20:14.147 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, admissiondxid, admitDxEnteredTimestamp, admitdxname, patienthealthsystemstayid, patientunitstayid\n", + "2024-12-14 17:20:14.147 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:14.147 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx/[0-7578).parquet\n", + "2024-12-14 17:20:14.150 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.003467\n", + "2024-12-14 17:20:14.150 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx/.[0-7578).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx/.[0-7578).parquet_cache/locks/2024-12-14T17:20:14.146860.json\n", + "2024-12-14 17:20:14.152 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/diagnosis.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis.\n", + "2024-12-14 17:20:14.152 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/diagnosis.parquet to determine row count.\n", + "2024-12-14 17:20:14.155 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.155 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/diagnosis.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.155 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['diagnosisEnteredTimestamp', 'diagnosispriority', 'diagnosisstring', 'icd9code', 'patienthealthsystemstayid']\n", + "2024-12-14 17:20:14.155 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, diagnosisEnteredTimestamp, diagnosispriority, diagnosisstring, icd9code, patienthealthsystemstayid\n", + "2024-12-14 17:20:14.155 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 24978 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/diagnosis.parquet.\n", + "2024-12-14 17:20:14.155 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/diagnosis.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:20:14.155 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/diagnosis.parquet row-chunk [0-24978) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis/[0-24978).parquet.\n", + "2024-12-14 17:20:14.158 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:14.158055. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:14.158 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/diagnosis.parquet\n", + "2024-12-14 17:20:14.158 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.158 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/diagnosis.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.158 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['diagnosisEnteredTimestamp', 'diagnosispriority', 'diagnosisstring', 'icd9code', 'patienthealthsystemstayid']\n", + "2024-12-14 17:20:14.158 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, diagnosisEnteredTimestamp, diagnosispriority, diagnosisstring, icd9code, patienthealthsystemstayid\n", + "2024-12-14 17:20:14.158 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:14.158 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis/[0-24978).parquet\n", + "2024-12-14 17:20:14.164 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006441\n", + "2024-12-14 17:20:14.164 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis/.[0-24978).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis/.[0-24978).parquet_cache/locks/2024-12-14T17:20:14.158055.json\n", + "2024-12-14 17:20:14.166 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCharting.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting.\n", + "2024-12-14 17:20:14.167 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCharting.parquet to determine row count.\n", + "2024-12-14 17:20:14.169 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.169 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCharting.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.169 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['patienthealthsystemstayid', 'respChartEnteredTimestamp', 'respChartPerformedTimestamp', 'respchartid', 'respcharttypecat', 'respchartvalue', 'respchartvaluelabel']\n", + "2024-12-14 17:20:14.169 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, patienthealthsystemstayid, respChartEnteredTimestamp, respChartPerformedTimestamp, respchartid, respcharttypecat, respchartvalue, respchartvaluelabel\n", + "2024-12-14 17:20:14.170 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 176089 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCharting.parquet.\n", + "2024-12-14 17:20:14.170 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCharting.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:20:14.170 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCharting.parquet row-chunk [0-176089) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting/[0-176089).parquet.\n", + "2024-12-14 17:20:14.172 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:14.172792. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:14.173 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCharting.parquet\n", + "2024-12-14 17:20:14.173 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.173 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCharting.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.173 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['patienthealthsystemstayid', 'respChartEnteredTimestamp', 'respChartPerformedTimestamp', 'respchartid', 'respcharttypecat', 'respchartvalue', 'respchartvaluelabel']\n", + "2024-12-14 17:20:14.173 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, patienthealthsystemstayid, respChartEnteredTimestamp, respChartPerformedTimestamp, respchartid, respcharttypecat, respchartvalue, respchartvaluelabel\n", + "2024-12-14 17:20:14.173 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:14.173 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting/[0-176089).parquet\n", + "2024-12-14 17:20:14.217 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.044580\n", + "2024-12-14 17:20:14.217 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting/.[0-176089).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting/.[0-176089).parquet_cache/locks/2024-12-14T17:20:14.172792.json\n", + "2024-12-14 17:20:14.220 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCare.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare.\n", + "2024-12-14 17:20:14.220 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCare.parquet to determine row count.\n", + "2024-12-14 17:20:14.222 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.222 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCare.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.222 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['cellattribute', 'cellattributevalue', 'celllabel', 'nurseCareEnteredTimestamp', 'nurseCarePerformedTimestamp', 'nursecareid', 'patienthealthsystemstayid']\n", + "2024-12-14 17:20:14.222 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, cellattribute, cellattributevalue, celllabel, nurseCareEnteredTimestamp, nurseCarePerformedTimestamp, nursecareid, patienthealthsystemstayid\n", + "2024-12-14 17:20:14.223 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 42080 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCare.parquet.\n", + "2024-12-14 17:20:14.223 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCare.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:20:14.223 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCare.parquet row-chunk [0-42080) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare/[0-42080).parquet.\n", + "2024-12-14 17:20:14.225 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:14.225610. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:14.225 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCare.parquet\n", + "2024-12-14 17:20:14.225 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.226 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCare.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.226 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['cellattribute', 'cellattributevalue', 'celllabel', 'nurseCareEnteredTimestamp', 'nurseCarePerformedTimestamp', 'nursecareid', 'patienthealthsystemstayid']\n", + "2024-12-14 17:20:14.226 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, cellattribute, cellattributevalue, celllabel, nurseCareEnteredTimestamp, nurseCarePerformedTimestamp, nursecareid, patienthealthsystemstayid\n", + "2024-12-14 17:20:14.226 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:14.226 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare/[0-42080).parquet\n", + "2024-12-14 17:20:14.237 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.012115\n", + "2024-12-14 17:20:14.237 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare/.[0-42080).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare/.[0-42080).parquet_cache/locks/2024-12-14T17:20:14.225610.json\n", + "2024-12-14 17:20:14.240 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGeneral.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral.\n", + "2024-12-14 17:20:14.240 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGeneral.parquet to determine row count.\n", + "2024-12-14 17:20:14.242 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.242 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGeneral.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.242 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['carePlanGeneralItemEnteredTimestamp', 'cplgroup', 'cplitemvalue', 'patienthealthsystemstayid']\n", + "2024-12-14 17:20:14.242 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, carePlanGeneralItemEnteredTimestamp, cplgroup, cplitemvalue, patienthealthsystemstayid\n", + "2024-12-14 17:20:14.243 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 33148 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGeneral.parquet.\n", + "2024-12-14 17:20:14.243 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGeneral.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:20:14.243 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGeneral.parquet row-chunk [0-33148) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral/[0-33148).parquet.\n", + "2024-12-14 17:20:14.246 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:14.245975. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:14.246 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGeneral.parquet\n", + "2024-12-14 17:20:14.246 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.246 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGeneral.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.246 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['carePlanGeneralItemEnteredTimestamp', 'cplgroup', 'cplitemvalue', 'patienthealthsystemstayid']\n", + "2024-12-14 17:20:14.246 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, carePlanGeneralItemEnteredTimestamp, cplgroup, cplitemvalue, patienthealthsystemstayid\n", + "2024-12-14 17:20:14.246 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:14.246 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral/[0-33148).parquet\n", + "2024-12-14 17:20:14.252 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006840\n", + "2024-12-14 17:20:14.252 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral/.[0-33148).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral/.[0-33148).parquet_cache/locks/2024-12-14T17:20:14.245975.json\n", + "2024-12-14 17:20:14.255 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/pastHistory.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory.\n", + "2024-12-14 17:20:14.255 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/pastHistory.parquet to determine row count.\n", + "2024-12-14 17:20:14.257 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.257 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/pastHistory.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.257 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['pastHistoryEnteredTimestamp', 'pastHistoryTakenTimestamp', 'pasthistoryid', 'pasthistorynotetype', 'pasthistorypath', 'pasthistoryvalue', 'pasthistoryvaluetext', 'patienthealthsystemstayid']\n", + "2024-12-14 17:20:14.258 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, pastHistoryEnteredTimestamp, pastHistoryTakenTimestamp, pasthistoryid, pasthistorynotetype, pasthistorypath, pasthistoryvalue, pasthistoryvaluetext, patienthealthsystemstayid\n", + "2024-12-14 17:20:14.258 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 12109 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/pastHistory.parquet.\n", + "2024-12-14 17:20:14.258 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/pastHistory.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:20:14.258 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/pastHistory.parquet row-chunk [0-12109) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory/[0-12109).parquet.\n", + "2024-12-14 17:20:14.260 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:14.260683. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:14.260 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/pastHistory.parquet\n", + "2024-12-14 17:20:14.261 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.261 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/pastHistory.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.261 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['pastHistoryEnteredTimestamp', 'pastHistoryTakenTimestamp', 'pasthistoryid', 'pasthistorynotetype', 'pasthistorypath', 'pasthistoryvalue', 'pasthistoryvaluetext', 'patienthealthsystemstayid']\n", + "2024-12-14 17:20:14.261 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, pastHistoryEnteredTimestamp, pastHistoryTakenTimestamp, pasthistoryid, pasthistorynotetype, pasthistorypath, pasthistoryvalue, pasthistoryvaluetext, patienthealthsystemstayid\n", + "2024-12-14 17:20:14.261 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:14.261 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory/[0-12109).parquet\n", + "2024-12-14 17:20:14.268 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007372\n", + "2024-12-14 17:20:14.268 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory/.[0-12109).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory/.[0-12109).parquet_cache/locks/2024-12-14T17:20:14.260683.json\n", + "2024-12-14 17:20:14.270 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalAperiodic.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic.\n", + "2024-12-14 17:20:14.270 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalAperiodic.parquet to determine row count.\n", + "2024-12-14 17:20:14.273 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.273 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalAperiodic.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.273 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['cardiacinput', 'cardiacoutput', 'noninvasivediastolic', 'noninvasivemean', 'noninvasivesystolic', 'observationEnteredTimestamp', 'paop', 'patienthealthsystemstayid', 'pvr', 'pvri', 'svr', 'svri', 'vitalaperiodicid']\n", + "2024-12-14 17:20:14.273 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, cardiacinput, cardiacoutput, noninvasivediastolic, noninvasivemean, noninvasivesystolic, observationEnteredTimestamp, paop, patienthealthsystemstayid, pvr, pvri, svr, svri, vitalaperiodicid\n", + "2024-12-14 17:20:14.274 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 274088 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalAperiodic.parquet.\n", + "2024-12-14 17:20:14.274 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalAperiodic.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:20:14.274 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalAperiodic.parquet row-chunk [0-274088) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic/[0-274088).parquet.\n", + "2024-12-14 17:20:14.276 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:14.276629. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:14.276 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalAperiodic.parquet\n", + "2024-12-14 17:20:14.276 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.277 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalAperiodic.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.277 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['cardiacinput', 'cardiacoutput', 'noninvasivediastolic', 'noninvasivemean', 'noninvasivesystolic', 'observationEnteredTimestamp', 'paop', 'patienthealthsystemstayid', 'pvr', 'pvri', 'svr', 'svri', 'vitalaperiodicid']\n", + "2024-12-14 17:20:14.277 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, cardiacinput, cardiacoutput, noninvasivediastolic, noninvasivemean, noninvasivesystolic, observationEnteredTimestamp, paop, patienthealthsystemstayid, pvr, pvri, svr, svri, vitalaperiodicid\n", + "2024-12-14 17:20:14.277 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:14.277 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic/[0-274088).parquet\n", + "2024-12-14 17:20:14.323 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.046469\n", + "2024-12-14 17:20:14.323 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic/.[0-274088).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic/.[0-274088).parquet_cache/locks/2024-12-14T17:20:14.276629.json\n", + "2024-12-14 17:20:14.326 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/allergy.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy.\n", + "2024-12-14 17:20:14.326 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/allergy.parquet to determine row count.\n", + "2024-12-14 17:20:14.328 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.328 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/allergy.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.328 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['allergyEnteredTimestamp', 'allergyname', 'allergytype', 'patienthealthsystemstayid']\n", + "2024-12-14 17:20:14.328 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, allergyEnteredTimestamp, allergyname, allergytype, patienthealthsystemstayid\n", + "2024-12-14 17:20:14.329 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 2475 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/allergy.parquet.\n", + "2024-12-14 17:20:14.329 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/allergy.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:20:14.329 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/allergy.parquet row-chunk [0-2475) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy/[0-2475).parquet.\n", + "2024-12-14 17:20:14.331 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:14.331616. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:14.331 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/allergy.parquet\n", + "2024-12-14 17:20:14.331 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.332 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/allergy.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.332 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['allergyEnteredTimestamp', 'allergyname', 'allergytype', 'patienthealthsystemstayid']\n", + "2024-12-14 17:20:14.332 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, allergyEnteredTimestamp, allergyname, allergytype, patienthealthsystemstayid\n", + "2024-12-14 17:20:14.332 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:14.332 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy/[0-2475).parquet\n", + "2024-12-14 17:20:14.334 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.002488\n", + "2024-12-14 17:20:14.334 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy/.[0-2475).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy/.[0-2475).parquet_cache/locks/2024-12-14T17:20:14.331616.json\n", + "2024-12-14 17:20:14.336 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCare.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare.\n", + "2024-12-14 17:20:14.336 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCare.parquet to determine row count.\n", + "2024-12-14 17:20:14.338 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.338 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCare.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.338 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['airwayposition', 'airwaysize', 'airwaytype', 'cpaplimit', 'cuffpressure', 'hiexhmvlimit', 'highironoxlimit', 'hipeakpreslimit', 'hirespratelimit', 'lowexhmvlimit', 'lowexhtvlimit', 'lowironoxlimit', 'lowpeakpreslimit', 'lowrespratelimit', 'meanairwaypreslimit', 'patienthealthsystemstayid', 'peeplimit', 'respCareStatusEnteredTimestamp', 'respcareid', 'setapneafio2', 'setapneaie', 'setapneainsptime', 'setapneainterval', 'setapneaippeephigh', 'setapneapeakflow', 'setapnearr', 'setapneatv', 'sighpreslimit', 'ventEndTimestamp', 'ventStartTimestamp']\n", + "2024-12-14 17:20:14.339 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, airwayposition, airwaysize, airwaytype, cpaplimit, cuffpressure, hiexhmvlimit, highironoxlimit, hipeakpreslimit, hirespratelimit, lowexhmvlimit, lowexhtvlimit, lowironoxlimit, lowpeakpreslimit, lowrespratelimit, meanairwaypreslimit, patienthealthsystemstayid, peeplimit, respCareStatusEnteredTimestamp, respcareid, setapneafio2, setapneaie, setapneainsptime, setapneainterval, setapneaippeephigh, setapneapeakflow, setapnearr, setapneatv, sighpreslimit, ventEndTimestamp, ventStartTimestamp\n", + "2024-12-14 17:20:14.339 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 5436 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCare.parquet.\n", + "2024-12-14 17:20:14.339 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCare.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:20:14.339 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCare.parquet row-chunk [0-5436) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare/[0-5436).parquet.\n", + "2024-12-14 17:20:14.341 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:14.341778. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:14.342 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCare.parquet\n", + "2024-12-14 17:20:14.342 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.342 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCare.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.342 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['airwayposition', 'airwaysize', 'airwaytype', 'cpaplimit', 'cuffpressure', 'hiexhmvlimit', 'highironoxlimit', 'hipeakpreslimit', 'hirespratelimit', 'lowexhmvlimit', 'lowexhtvlimit', 'lowironoxlimit', 'lowpeakpreslimit', 'lowrespratelimit', 'meanairwaypreslimit', 'patienthealthsystemstayid', 'peeplimit', 'respCareStatusEnteredTimestamp', 'respcareid', 'setapneafio2', 'setapneaie', 'setapneainsptime', 'setapneainterval', 'setapneaippeephigh', 'setapneapeakflow', 'setapnearr', 'setapneatv', 'sighpreslimit', 'ventEndTimestamp', 'ventStartTimestamp']\n", + "2024-12-14 17:20:14.342 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, airwayposition, airwaysize, airwaytype, cpaplimit, cuffpressure, hiexhmvlimit, highironoxlimit, hipeakpreslimit, hirespratelimit, lowexhmvlimit, lowexhtvlimit, lowironoxlimit, lowpeakpreslimit, lowrespratelimit, meanairwaypreslimit, patienthealthsystemstayid, peeplimit, respCareStatusEnteredTimestamp, respcareid, setapneafio2, setapneaie, setapneainsptime, setapneainterval, setapneaippeephigh, setapneapeakflow, setapnearr, setapneatv, sighpreslimit, ventEndTimestamp, ventStartTimestamp\n", + "2024-12-14 17:20:14.342 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:14.342 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare/[0-5436).parquet\n", + "2024-12-14 17:20:14.357 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.016024\n", + "2024-12-14 17:20:14.357 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare/.[0-5436).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare/.[0-5436).parquet_cache/locks/2024-12-14T17:20:14.341778.json\n", + "2024-12-14 17:20:14.358 | INFO | MEDS_transforms.extract.shard_events:main:430 - Sub-sharding completed in 0:00:01.286791\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:14.432\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: split_and_shard_subjects\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:14.438\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-split_and_shard_subjects --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs --config-name=extract_eICU 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=split_and_shard_subjects\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:15.172\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:15.172\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 17:20:14.978 | INFO | MEDS_transforms.utils:stage_init:73 - Running split_and_shard_subjects with the following configuration:\n", + "input_dir: ${oc.env:EICU_PRE_MEDS_DIR}\n", + "cohort_dir: ${oc.env:EICU_MEDS_COHORT_DIR}\n", + "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", + " at the top of your specific pipeline\n", + "\n", + " configuration file.'\n", + "log_dir: ${stage_cfg.output_dir}/.logs\n", + "do_overwrite: false\n", + "seed: 1\n", + "stages:\n", + "- shard_events\n", + "- split_and_shard_subjects\n", + "- convert_to_sharded_events\n", + "- merge_to_MEDS_cohort\n", + "- finalize_MEDS_metadata\n", + "- finalize_MEDS_data\n", + "stage_configs:\n", + " shard_events:\n", + " row_chunksize: 200000000\n", + " infer_schema_length: 999999999\n", + " data_input_dir: ${input_dir}\n", + " split_and_shard_subjects:\n", + " is_metadata: true\n", + " output_dir: ${cohort_dir}/metadata\n", + " n_subjects_per_shard: 10000\n", + " external_splits_json_fp: null\n", + " split_fracs:\n", + " train: 0.5\n", + " tuning: 0.25\n", + " held_out: 0.25\n", + " convert_to_sharded_events:\n", + " do_dedup_text_and_numeric: true\n", + " merge_to_MEDS_cohort:\n", + " unique_by: null\n", + " additional_sort_by: null\n", + " extract_code_metadata:\n", + " is_metadata: true\n", + " description_separator: '\n", + "\n", + " '\n", + " finalize_MEDS_metadata:\n", + " is_metadata: true\n", + " do_retype: true\n", + " finalize_MEDS_data:\n", + " do_retype: true\n", + "worker: 0\n", + "polling_time: 300\n", + "stage: split_and_shard_subjects\n", + "stage_cfg: ${oc.create:${populate_stage:${stage}, ${input_dir}, ${cohort_dir}, ${stages},\n", + " ${stage_configs}}}\n", + "etl_metadata:\n", + " pipeline_name: ???\n", + " dataset_name: eICU\n", + " dataset_version: 2.0\n", + " package_name: ${get_package_name:}\n", + " package_version: ${get_package_version:}\n", + "etl_metadata.pipeline_name: extract\n", + "description: \"This pipeline extracts the eICU dataset in longitudinal, sparse form\\\n", + " \\ from an input dataset meeting\\nselect criteria and converts them to the flattened,\\\n", + " \\ MEDS format. You can control the key arguments to this\\npipeline by setting environment\\\n", + " \\ variables:\\n```bash\\n export EVENT_CONVERSION_CONFIG_FP=# Path to your event\\\n", + " \\ conversion config\\n export EICU_PRE_MEDS_DIR=# Path to the output dir of the\\\n", + " \\ pre-MEDS step\\n export EICU_MEDS_COHORT_DIR=# Path to where you want the dataset\\\n", + " \\ to live\\n```\"\n", + "event_conversion_config_fp: ${oc.env:EVENT_CONVERSION_CONFIG_FP}\n", + "shards_map_fp: ${cohort_dir}/metadata/.shards.json\n", + "\n", + "2024-12-14 17:20:14.991 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "is_metadata: true\n", + "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo//meds//metadata\n", + "n_subjects_per_shard: 10000\n", + "external_splits_json_fp: null\n", + "split_fracs:\n", + " train: 0.5\n", + " tuning: 0.25\n", + " held_out: 0.25\n", + "data_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events\n", + "metadata_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/metadata\n", + "reducer_output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/split_and_shard_subjects\n", + "train_only: true\n", + "\n", + "Paths: (checkbox indicates if it exists)\n", + " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events\n", + " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/metadata\n", + " - metadata_input_dir: ❌ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/metadata\n", + "2024-12-14 17:20:14.992 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:215 - Reading event conversion config from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs/event_configs.yaml (needed for subject ID columns)\n", + "2024-12-14 17:20:15.041 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:219 - Event conversion config:\n", + "subject_id_col: patienthealthsystemstayid\n", + "patient:\n", + " dob:\n", + " code: MEDS_BIRTH\n", + " time: col(dateofbirth)\n", + " uniquepid: uniquepid\n", + " gender:\n", + " code:\n", + " - GENDER\n", + " - col(gender)\n", + " time: null\n", + " ethnicity:\n", + " code:\n", + " - ETHNICITY\n", + " - col(ethnicity)\n", + " time: null\n", + " hosp_admission:\n", + " code:\n", + " - HOSPITAL_ADMISSION\n", + " - col(hospitaladmitsource)\n", + " - col(hospitalregion)\n", + " - col(hospitalteachingstatus)\n", + " - col(hospitalnumbedscategory)\n", + " time: col(hospitaladmittimestamp)\n", + " hospital_id: hospitalid\n", + " hosp_discharge:\n", + " code:\n", + " - HOSPITAL_DISCHARGE\n", + " - col(hospitaldischargestatus)\n", + " - col(hospitaldischargelocation)\n", + " time: col(hospitaldischargetimestamp)\n", + " unit_admission:\n", + " code:\n", + " - UNIT_ADMISSION\n", + " - col(unitadmitsource)\n", + " - col(unitstaytype)\n", + " time: col(unitadmittimestamp)\n", + " ward_id: wardid\n", + " unit_stay_id: patientunitstayid\n", + " unit_admission_weight:\n", + " code:\n", + " - UNIT_ADMISSION_WEIGHT\n", + " time: col(unitadmittimestamp)\n", + " numeric_value: unitadmissionweight\n", + " unit_admission_height:\n", + " code:\n", + " - UNIT_ADMISSION_HEIGHT\n", + " time: col(unitadmittimestamp)\n", + " numeric_value: unitadmissionheight\n", + " unit_discharge:\n", + " code:\n", + " - UNIT_DISCHARGE\n", + " - col(unitdischargestatus)\n", + " - col(unitdischargelocation)\n", + " time: col(unitdischargetimestamp)\n", + " unit_discharge_weight:\n", + " code:\n", + " - UNIT_DISCHARGE_WEIGHT\n", + " time: col(unitdischargetimestamp)\n", + " numeric_value: unitdischargeweight\n", + "admissionDx:\n", + " admission_diagnosis:\n", + " code:\n", + " - ADMISSION_DX\n", + " - col(admitdxname)\n", + " time: col(admitDxEnteredTimestamp)\n", + " admission_dx_id: admissiondxid\n", + " unit_stay_id: patientunitstayid\n", + "allergy:\n", + " allergy:\n", + " code:\n", + " - ALLERGY\n", + " - col(allergytype)\n", + " - col(allergyname)\n", + " time: col(allergyEnteredTimestamp)\n", + "carePlanGeneral:\n", + " cplItem:\n", + " code:\n", + " - CAREPLAN_GENERAL\n", + " - col(cplgroup)\n", + " - col(cplitemvalue)\n", + " time: col(carePlanGeneralItemEnteredTimestamp)\n", + "carePlanEOL:\n", + " cplEolDiscussion:\n", + " code:\n", + " - CAREPLAN_EOL\n", + " time: col(carePlanEolDiscussionOccurredTimestamp)\n", + "carePlanGoal:\n", + " cplGoal:\n", + " code:\n", + " - CAREPLAN_GOAL\n", + " - col(cplgoalcategory)\n", + " - col(cplgoalvalue)\n", + " - col(cplgoalstatus)\n", + " time: col(carePlanGoalEnteredTimestamp)\n", + "carePlanInfectiousDisease:\n", + " cplInfectDisease:\n", + " code:\n", + " - CAREPLAN_INFECTIOUS_DISEASE\n", + " - col(infectdiseasesite)\n", + " - col(infectdiseaseassessment)\n", + " - col(treatment)\n", + " - col(responsetotherapy)\n", + " time: col(carePlanInfectDiseaseEnteredTimestamp)\n", + "diagnosis:\n", + " diagnosis:\n", + " code:\n", + " - ICD9CM\n", + " - col(icd9code)\n", + " - col(diagnosispriority)\n", + " time: col(diagnosisEnteredTimestamp)\n", + " diagnosis_string: diagnosisstring\n", + "lab:\n", + " lab:\n", + " code:\n", + " - LAB\n", + " - col(labmeasurenamesystem)\n", + " - col(labmeasurenameinterface)\n", + " - col(labname)\n", + " time: col(labResultDrawnTimestamp)\n", + " numeric_value: labresult\n", + " text_value: labresulttext\n", + " lab_type_id: labtypeid\n", + "medication:\n", + " drug_ordered:\n", + " code:\n", + " - MEDICATION\n", + " - ORDERED\n", + " - col(drugname)\n", + " time: col(drugordertimestamp)\n", + " medication_id: medicationid\n", + " drug_iv_admixture: drugivadmixture\n", + " dosage: dosage\n", + " route_admin: routeadmin\n", + " frequency: frequency\n", + " loading_dose: loadingdose\n", + " prn: prn\n", + " gtc: gtc\n", + " drug_started:\n", + " code:\n", + " - MEDICATION\n", + " - STARTED\n", + " - col(drugname)\n", + " time: col(drugstarttimestamp)\n", + " medication_id: medicationid\n", + " drug_stopped:\n", + " code:\n", + " - MEDICATION\n", + " - STOPPED\n", + " - col(drugname)\n", + " time: col(drugstoptimestamp)\n", + " medication_id: medicationid\n", + "nurseAssessment:\n", + " nurse_assessment_performed:\n", + " code:\n", + " - NURSE_ASSESSMENT\n", + " - PERFORMED\n", + " - NOT YET DONE\n", + " time: col(nurseAssessPerformedTimestamp)\n", + " nurse_assessment_id: nurseassessid\n", + " cell_label: celllabel\n", + " cell_attribute: cellattribute\n", + " cell_attribute_value: cellattributevalue\n", + " nurse_assessment_entered:\n", + " code:\n", + " - NURSE_ASSESSMENT\n", + " - ENTERED\n", + " - NOT YET DONE\n", + " time: col(nurseAssessEnteredTimestamp)\n", + " nurse_assessment_id: nurseassessid\n", + " cell_label: celllabel\n", + " cell_attribute: cellattribute\n", + " cell_attribute_value: cellattributevalue\n", + "nurseCare:\n", + " nurse_care_performed:\n", + " code:\n", + " - NURSE_CARE\n", + " - PERFORMED\n", + " - NOT YET DONE\n", + " time: col(nurseCarePerformedTimestamp)\n", + " nurse_care_id: nursecareid\n", + " cell_label: celllabel\n", + " cell_attribute: cellattribute\n", + " cell_attribute_value: cellattributevalue\n", + " nurse_care_entered:\n", + " code:\n", + " - NURSE_CARE\n", + " - ENTERED\n", + " - NOT YET DONE\n", + " time: col(nurseCareEnteredTimestamp)\n", + " nurse_care_id: nursecareid\n", + " cell_label: celllabel\n", + " cell_attribute: cellattribute\n", + " cell_attribute_value: cellattributevalue\n", + "nurseCharting:\n", + " nurse_charting_performed:\n", + " code:\n", + " - NURSE_CHARTING\n", + " - PERFORMED\n", + " - NOT YET DONE\n", + " time: col(nursingChartPerformedTimestamp)\n", + " nurse_charting_id: nursingchartid\n", + " cell_type_cat: nursingchartcelltypecat\n", + " cell_type_val_name: nursingchartcelltypevalname\n", + " cell_type_val_label: nursingchartcelltypevallabel\n", + " cell_value: nursingchartvalue\n", + " nurse_charting_entered:\n", + " code:\n", + " - NURSE_CHARTING\n", + " - ENTERED\n", + " - NOT YET DONE\n", + " time: col(nursingChartEnteredTimestamp)\n", + " nurse_charting_id: nursingchartid\n", + " cell_type_cat: nursingchartcelltypecat\n", + " cell_type_val_name: nursingchartcelltypevalname\n", + " cell_type_val_label: nursingchartcelltypevallabel\n", + " cell_value: nursingchartvalue\n", + "pastHistory:\n", + " past_history_taken:\n", + " code:\n", + " - PAST_HISTORY\n", + " - TAKEN\n", + " - NOT YET DONE\n", + " time: col(pastHistoryTakenTimestamp)\n", + " past_history_id: pasthistoryid\n", + " note_type: pasthistorynotetype\n", + " path: pasthistorypath\n", + " value: pasthistoryvalue\n", + " value_text: pasthistoryvaluetext\n", + " past_history_entered:\n", + " code:\n", + " - PAST_HISTORY\n", + " - ENTERED\n", + " - NOT YET DONE\n", + " time: col(pastHistoryEnteredTimestamp)\n", + " past_history_id: pasthistoryid\n", + " note_type: pasthistorynotetype\n", + " path: pasthistorypath\n", + " value: pasthistoryvalue\n", + " value_text: pasthistoryvaluetext\n", + "physicalExam:\n", + " physical_exam_entered:\n", + " code:\n", + " - PHYSICAL_EXAM\n", + " - ENTERED\n", + " - NOT YET DONE\n", + " time: col(physicalExamEnteredTimestamp)\n", + " physical_exam_id: physicalexamid\n", + " text: physicalexamtext\n", + " path: physicalexampath\n", + " value: physicalexamvalue\n", + "respiratoryCare:\n", + " resp_care_status:\n", + " code:\n", + " - RESP_CARE\n", + " - STATUS\n", + " - NOT YET DONE\n", + " time: col(respCareStatusEnteredTimestamp)\n", + " resp_care_id: respcareid\n", + " airwaytype: airwaytype\n", + " airwaysize: airwaysize\n", + " airwayposition: airwayposition\n", + " cuffpressure: cuffpressure\n", + " lowexhmvlimit: lowexhmvlimit\n", + " hiexhmvlimit: hiexhmvlimit\n", + " lowexhtvlimit: lowexhtvlimit\n", + " hipeakpreslimit: hipeakpreslimit\n", + " lowpeakpreslimit: lowpeakpreslimit\n", + " hirespratelimit: hirespratelimit\n", + " lowrespratelimit: lowrespratelimit\n", + " sighpreslimit: sighpreslimit\n", + " lowironoxlimit: lowironoxlimit\n", + " highironoxlimit: highironoxlimit\n", + " meanairwaypreslimit: meanairwaypreslimit\n", + " peeplimit: peeplimit\n", + " cpaplimit: cpaplimit\n", + " setapneainterval: setapneainterval\n", + " setapneatv: setapneatv\n", + " setapneaippeephigh: setapneaippeephigh\n", + " setapnearr: setapnearr\n", + " setapneapeakflow: setapneapeakflow\n", + " setapneainsptime: setapneainsptime\n", + " setapneaie: setapneaie\n", + " setapneafio2: setapneafio2\n", + " vent_start:\n", + " code:\n", + " - VENT\n", + " - START\n", + " - NOT YET DONE\n", + " time: col(ventStartTimestamp)\n", + " resp_care_id: respcareid\n", + " vent_end:\n", + " code:\n", + " - VENT\n", + " - END\n", + " - NOT YET DONE\n", + " time: col(ventEndTimestamp)\n", + " resp_care_id: respcareid\n", + "respiratoryCharting:\n", + " resp_charting_performed:\n", + " code:\n", + " - RESP_CHARTING\n", + " - PERFORMED\n", + " - NOT YET DONE\n", + " time: col(respChartPerformedTimestamp)\n", + " resp_chart_id: respchartid\n", + " type_cat: respcharttypecat\n", + " value_label: respchartvaluelabel\n", + " value: respchartvalue\n", + " resp_charting_entered:\n", + " code:\n", + " - RESP_CHARTING\n", + " - ENTERED\n", + " - NOT YET DONE\n", + " time: col(respChartEnteredTimestamp)\n", + " resp_chart_id: respchartid\n", + " type_cat: respcharttypecat\n", + " value_label: respchartvaluelabel\n", + " value: respchartvalue\n", + "treatment:\n", + " treatment:\n", + " code:\n", + " - TREATMENT\n", + " - ENTERED\n", + " - col(treatmentstring)\n", + " time: col(treatmentEnteredTimestamp)\n", + " treatment_id: treatmentid\n", + "vitalAperiodic:\n", + " non_invasive_systolic:\n", + " code:\n", + " - VITALS\n", + " - APERIODIC\n", + " - BP\n", + " - NONINVASIVE_SYSTOLIC\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalaperiodicid\n", + " numeric_value: noninvasivesystolic\n", + " non_invasive_diastolic:\n", + " code:\n", + " - VITALS\n", + " - APERIODIC\n", + " - BP\n", + " - NONINVASIVE_DIASTOLIC\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalaperiodicid\n", + " numeric_value: noninvasivediastolic\n", + " non_invasive_mean:\n", + " code:\n", + " - VITALS\n", + " - APERIODIC\n", + " - BP\n", + " - NONINVASIVE_MEAN\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalaperiodicid\n", + " numeric_value: noninvasivemean\n", + " paop:\n", + " code:\n", + " - VITALS\n", + " - APERIODIC\n", + " - PAOP\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalaperiodicid\n", + " numeric_value: paop\n", + " cardiac_output:\n", + " code:\n", + " - VITALS\n", + " - APERIODIC\n", + " - CARDIAC_OUTPUT\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalaperiodicid\n", + " numeric_value: cardiacoutput\n", + " cardiac_input:\n", + " code:\n", + " - VITALS\n", + " - APERIODIC\n", + " - CARDIAC_INPUT\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalaperiodicid\n", + " numeric_value: cardiacinput\n", + " svr:\n", + " code:\n", + " - VITALS\n", + " - APERIODIC\n", + " - SVR\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalaperiodicid\n", + " numeric_value: svr\n", + " svri:\n", + " code:\n", + " - VITALS\n", + " - APERIODIC\n", + " - SVRI\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalaperiodicid\n", + " numeric_value: svri\n", + " pvr:\n", + " code:\n", + " - VITALS\n", + " - APERIODIC\n", + " - PVR\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalaperiodicid\n", + " numeric_value: pvr\n", + " pvri:\n", + " code:\n", + " - VITALS\n", + " - APERIODIC\n", + " - PVRI\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalaperiodicid\n", + " numeric_value: pvri\n", + "vitalPeriodic:\n", + " temperature:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - TEMPERATURE\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: temperature\n", + " saO2:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - SAO2\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: sao2\n", + " heartRate:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - HEARTRATE\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: heartrate\n", + " respiration:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - RESPIRATION\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: respiration\n", + " cvp:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - CVP\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: cvp\n", + " etCo2:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - ETCO2\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: etco2\n", + " systemic_systolic:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - BP\n", + " - SYSTEMIC_SYSTOLIC\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: systemicsystolic\n", + " systemic_diastolic:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - BP\n", + " - SYSTEMIC_DIASTOLIC\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: systemicdiastolic\n", + " systemic_mean:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - BP\n", + " - SYSTEMIC_MEAN\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: systemicmean\n", + " pa_systolic:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - BP\n", + " - PULM_ART_SYSTOLIC\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: pasystolic\n", + " pa_diastolic:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - BP\n", + " - PULM_ART_DIASTOLIC\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: padiastolic\n", + " pa_mean:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - BP\n", + " - PULM_ART_MEAN\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: pamean\n", + " st1:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - ST1\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: st1\n", + " st2:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - ST2\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: st2\n", + " st3:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - ST3\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: st3\n", + " ICP:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - ICP\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: icp\n", + "\n", + "2024-12-14 17:20:15.041 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from patient files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient/[0-2520).parquet\n", + "2024-12-14 17:20:15.042 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from admissionDx files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx/[0-7578).parquet\n", + "2024-12-14 17:20:15.043 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from allergy files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy/[0-2475).parquet\n", + "2024-12-14 17:20:15.043 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from carePlanGeneral files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral/[0-33148).parquet\n", + "2024-12-14 17:20:15.043 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from carePlanEOL files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL/[0-15).parquet\n", + "2024-12-14 17:20:15.043 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from carePlanGoal files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal/[0-3633).parquet\n", + "2024-12-14 17:20:15.044 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from carePlanInfectiousDisease files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease/[0-112).parquet\n", + "2024-12-14 17:20:15.044 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from diagnosis files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis/[0-24978).parquet\n", + "2024-12-14 17:20:15.044 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from lab files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab/[0-434660).parquet\n", + "2024-12-14 17:20:15.044 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from medication files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication/[0-75604).parquet\n", + "2024-12-14 17:20:15.045 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from nurseAssessment files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment/[0-91589).parquet\n", + "2024-12-14 17:20:15.045 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from nurseCare files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare/[0-42080).parquet\n", + "2024-12-14 17:20:15.045 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from nurseCharting files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting/[0-1477163).parquet\n", + "2024-12-14 17:20:15.045 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from pastHistory files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory/[0-12109).parquet\n", + "2024-12-14 17:20:15.045 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from physicalExam files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam/[0-84058).parquet\n", + "2024-12-14 17:20:15.046 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from respiratoryCare files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare/[0-5436).parquet\n", + "2024-12-14 17:20:15.046 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from respiratoryCharting files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting/[0-176089).parquet\n", + "2024-12-14 17:20:15.046 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from treatment files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment/[0-38290).parquet\n", + "2024-12-14 17:20:15.046 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from vitalAperiodic files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic/[0-274088).parquet\n", + "2024-12-14 17:20:15.047 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from vitalPeriodic files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 17:20:15.047 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:239 - Joining all subject IDs from 20 dataframes\n", + "2024-12-14 17:20:15.108 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:247 - Found 2174 unique subject IDs of type int64\n", + "2024-12-14 17:20:15.112 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:262 - Sharding and splitting subjects\n", + "2024-12-14 17:20:15.131 | INFO | MEDS_transforms.extract.split_and_shard_subjects:shard_subjects:164 - Split train/0 has 1087 subjects.\n", + "2024-12-14 17:20:15.131 | INFO | MEDS_transforms.extract.split_and_shard_subjects:shard_subjects:164 - Split tuning/0 has 544 subjects.\n", + "2024-12-14 17:20:15.131 | INFO | MEDS_transforms.extract.split_and_shard_subjects:shard_subjects:164 - Split held_out/0 has 543 subjects.\n", + "2024-12-14 17:20:15.132 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:273 - Writing sharded subjects to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/metadata/.shards.json\n", + "2024-12-14 17:20:15.132 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:276 - Done writing sharded subjects\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:15.176\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: convert_to_sharded_events\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:15.182\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-convert_to_sharded_events --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs --config-name=extract_eICU 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=convert_to_sharded_events\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:24.817\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:24.820\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 17:20:15.716 | INFO | MEDS_transforms.utils:stage_init:73 - Running convert_to_sharded_events with the following configuration:\n", + "input_dir: ${oc.env:EICU_PRE_MEDS_DIR}\n", + "cohort_dir: ${oc.env:EICU_MEDS_COHORT_DIR}\n", + "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", + " at the top of your specific pipeline\n", + "\n", + " configuration file.'\n", + "log_dir: ${stage_cfg.output_dir}/.logs\n", + "do_overwrite: false\n", + "seed: 1\n", + "stages:\n", + "- shard_events\n", + "- split_and_shard_subjects\n", + "- convert_to_sharded_events\n", + "- merge_to_MEDS_cohort\n", + "- finalize_MEDS_metadata\n", + "- finalize_MEDS_data\n", + "stage_configs:\n", + " shard_events:\n", + " row_chunksize: 200000000\n", + " infer_schema_length: 999999999\n", + " data_input_dir: ${input_dir}\n", + " split_and_shard_subjects:\n", + " is_metadata: true\n", + " output_dir: ${cohort_dir}/metadata\n", + " n_subjects_per_shard: 10000\n", + " external_splits_json_fp: null\n", + " split_fracs:\n", + " train: 0.5\n", + " tuning: 0.25\n", + " held_out: 0.25\n", + " convert_to_sharded_events:\n", + " do_dedup_text_and_numeric: true\n", + " merge_to_MEDS_cohort:\n", + " unique_by: null\n", + " additional_sort_by: null\n", + " extract_code_metadata:\n", + " is_metadata: true\n", + " description_separator: '\n", + "\n", + " '\n", + " finalize_MEDS_metadata:\n", + " is_metadata: true\n", + " do_retype: true\n", + " finalize_MEDS_data:\n", + " do_retype: true\n", + "worker: 0\n", + "polling_time: 300\n", + "stage: convert_to_sharded_events\n", + "stage_cfg: ${oc.create:${populate_stage:${stage}, ${input_dir}, ${cohort_dir}, ${stages},\n", + " ${stage_configs}}}\n", + "etl_metadata:\n", + " pipeline_name: ???\n", + " dataset_name: eICU\n", + " dataset_version: 2.0\n", + " package_name: ${get_package_name:}\n", + " package_version: ${get_package_version:}\n", + "etl_metadata.pipeline_name: extract\n", + "description: \"This pipeline extracts the eICU dataset in longitudinal, sparse form\\\n", + " \\ from an input dataset meeting\\nselect criteria and converts them to the flattened,\\\n", + " \\ MEDS format. You can control the key arguments to this\\npipeline by setting environment\\\n", + " \\ variables:\\n```bash\\n export EVENT_CONVERSION_CONFIG_FP=# Path to your event\\\n", + " \\ conversion config\\n export EICU_PRE_MEDS_DIR=# Path to the output dir of the\\\n", + " \\ pre-MEDS step\\n export EICU_MEDS_COHORT_DIR=# Path to where you want the dataset\\\n", + " \\ to live\\n```\"\n", + "event_conversion_config_fp: ${oc.env:EVENT_CONVERSION_CONFIG_FP}\n", + "shards_map_fp: ${cohort_dir}/metadata/.shards.json\n", + "\n", + "2024-12-14 17:20:15.729 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "do_dedup_text_and_numeric: true\n", + "is_metadata: false\n", + "data_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events\n", + "metadata_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/split_and_shard_subjects\n", + "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events\n", + "reducer_output_dir: null\n", + "\n", + "Paths: (checkbox indicates if it exists)\n", + " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events\n", + " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events\n", + " - metadata_input_dir: ❌ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/split_and_shard_subjects\n", + "2024-12-14 17:20:15.730 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:769 - Starting event conversion.\n", + "2024-12-14 17:20:15.730 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:771 - Reading event conversion config from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs/event_configs.yaml\n", + "2024-12-14 17:20:15.779 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:773 - Event conversion config:\n", + "subject_id_col: patienthealthsystemstayid\n", + "patient:\n", + " dob:\n", + " code: MEDS_BIRTH\n", + " time: col(dateofbirth)\n", + " uniquepid: uniquepid\n", + " gender:\n", + " code:\n", + " - GENDER\n", + " - col(gender)\n", + " time: null\n", + " ethnicity:\n", + " code:\n", + " - ETHNICITY\n", + " - col(ethnicity)\n", + " time: null\n", + " hosp_admission:\n", + " code:\n", + " - HOSPITAL_ADMISSION\n", + " - col(hospitaladmitsource)\n", + " - col(hospitalregion)\n", + " - col(hospitalteachingstatus)\n", + " - col(hospitalnumbedscategory)\n", + " time: col(hospitaladmittimestamp)\n", + " hospital_id: hospitalid\n", + " hosp_discharge:\n", + " code:\n", + " - HOSPITAL_DISCHARGE\n", + " - col(hospitaldischargestatus)\n", + " - col(hospitaldischargelocation)\n", + " time: col(hospitaldischargetimestamp)\n", + " unit_admission:\n", + " code:\n", + " - UNIT_ADMISSION\n", + " - col(unitadmitsource)\n", + " - col(unitstaytype)\n", + " time: col(unitadmittimestamp)\n", + " ward_id: wardid\n", + " unit_stay_id: patientunitstayid\n", + " unit_admission_weight:\n", + " code:\n", + " - UNIT_ADMISSION_WEIGHT\n", + " time: col(unitadmittimestamp)\n", + " numeric_value: unitadmissionweight\n", + " unit_admission_height:\n", + " code:\n", + " - UNIT_ADMISSION_HEIGHT\n", + " time: col(unitadmittimestamp)\n", + " numeric_value: unitadmissionheight\n", + " unit_discharge:\n", + " code:\n", + " - UNIT_DISCHARGE\n", + " - col(unitdischargestatus)\n", + " - col(unitdischargelocation)\n", + " time: col(unitdischargetimestamp)\n", + " unit_discharge_weight:\n", + " code:\n", + " - UNIT_DISCHARGE_WEIGHT\n", + " time: col(unitdischargetimestamp)\n", + " numeric_value: unitdischargeweight\n", + "admissionDx:\n", + " admission_diagnosis:\n", + " code:\n", + " - ADMISSION_DX\n", + " - col(admitdxname)\n", + " time: col(admitDxEnteredTimestamp)\n", + " admission_dx_id: admissiondxid\n", + " unit_stay_id: patientunitstayid\n", + "allergy:\n", + " allergy:\n", + " code:\n", + " - ALLERGY\n", + " - col(allergytype)\n", + " - col(allergyname)\n", + " time: col(allergyEnteredTimestamp)\n", + "carePlanGeneral:\n", + " cplItem:\n", + " code:\n", + " - CAREPLAN_GENERAL\n", + " - col(cplgroup)\n", + " - col(cplitemvalue)\n", + " time: col(carePlanGeneralItemEnteredTimestamp)\n", + "carePlanEOL:\n", + " cplEolDiscussion:\n", + " code:\n", + " - CAREPLAN_EOL\n", + " time: col(carePlanEolDiscussionOccurredTimestamp)\n", + "carePlanGoal:\n", + " cplGoal:\n", + " code:\n", + " - CAREPLAN_GOAL\n", + " - col(cplgoalcategory)\n", + " - col(cplgoalvalue)\n", + " - col(cplgoalstatus)\n", + " time: col(carePlanGoalEnteredTimestamp)\n", + "carePlanInfectiousDisease:\n", + " cplInfectDisease:\n", + " code:\n", + " - CAREPLAN_INFECTIOUS_DISEASE\n", + " - col(infectdiseasesite)\n", + " - col(infectdiseaseassessment)\n", + " - col(treatment)\n", + " - col(responsetotherapy)\n", + " time: col(carePlanInfectDiseaseEnteredTimestamp)\n", + "diagnosis:\n", + " diagnosis:\n", + " code:\n", + " - ICD9CM\n", + " - col(icd9code)\n", + " - col(diagnosispriority)\n", + " time: col(diagnosisEnteredTimestamp)\n", + " diagnosis_string: diagnosisstring\n", + "lab:\n", + " lab:\n", + " code:\n", + " - LAB\n", + " - col(labmeasurenamesystem)\n", + " - col(labmeasurenameinterface)\n", + " - col(labname)\n", + " time: col(labResultDrawnTimestamp)\n", + " numeric_value: labresult\n", + " text_value: labresulttext\n", + " lab_type_id: labtypeid\n", + "medication:\n", + " drug_ordered:\n", + " code:\n", + " - MEDICATION\n", + " - ORDERED\n", + " - col(drugname)\n", + " time: col(drugordertimestamp)\n", + " medication_id: medicationid\n", + " drug_iv_admixture: drugivadmixture\n", + " dosage: dosage\n", + " route_admin: routeadmin\n", + " frequency: frequency\n", + " loading_dose: loadingdose\n", + " prn: prn\n", + " gtc: gtc\n", + " drug_started:\n", + " code:\n", + " - MEDICATION\n", + " - STARTED\n", + " - col(drugname)\n", + " time: col(drugstarttimestamp)\n", + " medication_id: medicationid\n", + " drug_stopped:\n", + " code:\n", + " - MEDICATION\n", + " - STOPPED\n", + " - col(drugname)\n", + " time: col(drugstoptimestamp)\n", + " medication_id: medicationid\n", + "nurseAssessment:\n", + " nurse_assessment_performed:\n", + " code:\n", + " - NURSE_ASSESSMENT\n", + " - PERFORMED\n", + " - NOT YET DONE\n", + " time: col(nurseAssessPerformedTimestamp)\n", + " nurse_assessment_id: nurseassessid\n", + " cell_label: celllabel\n", + " cell_attribute: cellattribute\n", + " cell_attribute_value: cellattributevalue\n", + " nurse_assessment_entered:\n", + " code:\n", + " - NURSE_ASSESSMENT\n", + " - ENTERED\n", + " - NOT YET DONE\n", + " time: col(nurseAssessEnteredTimestamp)\n", + " nurse_assessment_id: nurseassessid\n", + " cell_label: celllabel\n", + " cell_attribute: cellattribute\n", + " cell_attribute_value: cellattributevalue\n", + "nurseCare:\n", + " nurse_care_performed:\n", + " code:\n", + " - NURSE_CARE\n", + " - PERFORMED\n", + " - NOT YET DONE\n", + " time: col(nurseCarePerformedTimestamp)\n", + " nurse_care_id: nursecareid\n", + " cell_label: celllabel\n", + " cell_attribute: cellattribute\n", + " cell_attribute_value: cellattributevalue\n", + " nurse_care_entered:\n", + " code:\n", + " - NURSE_CARE\n", + " - ENTERED\n", + " - NOT YET DONE\n", + " time: col(nurseCareEnteredTimestamp)\n", + " nurse_care_id: nursecareid\n", + " cell_label: celllabel\n", + " cell_attribute: cellattribute\n", + " cell_attribute_value: cellattributevalue\n", + "nurseCharting:\n", + " nurse_charting_performed:\n", + " code:\n", + " - NURSE_CHARTING\n", + " - PERFORMED\n", + " - NOT YET DONE\n", + " time: col(nursingChartPerformedTimestamp)\n", + " nurse_charting_id: nursingchartid\n", + " cell_type_cat: nursingchartcelltypecat\n", + " cell_type_val_name: nursingchartcelltypevalname\n", + " cell_type_val_label: nursingchartcelltypevallabel\n", + " cell_value: nursingchartvalue\n", + " nurse_charting_entered:\n", + " code:\n", + " - NURSE_CHARTING\n", + " - ENTERED\n", + " - NOT YET DONE\n", + " time: col(nursingChartEnteredTimestamp)\n", + " nurse_charting_id: nursingchartid\n", + " cell_type_cat: nursingchartcelltypecat\n", + " cell_type_val_name: nursingchartcelltypevalname\n", + " cell_type_val_label: nursingchartcelltypevallabel\n", + " cell_value: nursingchartvalue\n", + "pastHistory:\n", + " past_history_taken:\n", + " code:\n", + " - PAST_HISTORY\n", + " - TAKEN\n", + " - NOT YET DONE\n", + " time: col(pastHistoryTakenTimestamp)\n", + " past_history_id: pasthistoryid\n", + " note_type: pasthistorynotetype\n", + " path: pasthistorypath\n", + " value: pasthistoryvalue\n", + " value_text: pasthistoryvaluetext\n", + " past_history_entered:\n", + " code:\n", + " - PAST_HISTORY\n", + " - ENTERED\n", + " - NOT YET DONE\n", + " time: col(pastHistoryEnteredTimestamp)\n", + " past_history_id: pasthistoryid\n", + " note_type: pasthistorynotetype\n", + " path: pasthistorypath\n", + " value: pasthistoryvalue\n", + " value_text: pasthistoryvaluetext\n", + "physicalExam:\n", + " physical_exam_entered:\n", + " code:\n", + " - PHYSICAL_EXAM\n", + " - ENTERED\n", + " - NOT YET DONE\n", + " time: col(physicalExamEnteredTimestamp)\n", + " physical_exam_id: physicalexamid\n", + " text: physicalexamtext\n", + " path: physicalexampath\n", + " value: physicalexamvalue\n", + "respiratoryCare:\n", + " resp_care_status:\n", + " code:\n", + " - RESP_CARE\n", + " - STATUS\n", + " - NOT YET DONE\n", + " time: col(respCareStatusEnteredTimestamp)\n", + " resp_care_id: respcareid\n", + " airwaytype: airwaytype\n", + " airwaysize: airwaysize\n", + " airwayposition: airwayposition\n", + " cuffpressure: cuffpressure\n", + " lowexhmvlimit: lowexhmvlimit\n", + " hiexhmvlimit: hiexhmvlimit\n", + " lowexhtvlimit: lowexhtvlimit\n", + " hipeakpreslimit: hipeakpreslimit\n", + " lowpeakpreslimit: lowpeakpreslimit\n", + " hirespratelimit: hirespratelimit\n", + " lowrespratelimit: lowrespratelimit\n", + " sighpreslimit: sighpreslimit\n", + " lowironoxlimit: lowironoxlimit\n", + " highironoxlimit: highironoxlimit\n", + " meanairwaypreslimit: meanairwaypreslimit\n", + " peeplimit: peeplimit\n", + " cpaplimit: cpaplimit\n", + " setapneainterval: setapneainterval\n", + " setapneatv: setapneatv\n", + " setapneaippeephigh: setapneaippeephigh\n", + " setapnearr: setapnearr\n", + " setapneapeakflow: setapneapeakflow\n", + " setapneainsptime: setapneainsptime\n", + " setapneaie: setapneaie\n", + " setapneafio2: setapneafio2\n", + " vent_start:\n", + " code:\n", + " - VENT\n", + " - START\n", + " - NOT YET DONE\n", + " time: col(ventStartTimestamp)\n", + " resp_care_id: respcareid\n", + " vent_end:\n", + " code:\n", + " - VENT\n", + " - END\n", + " - NOT YET DONE\n", + " time: col(ventEndTimestamp)\n", + " resp_care_id: respcareid\n", + "respiratoryCharting:\n", + " resp_charting_performed:\n", + " code:\n", + " - RESP_CHARTING\n", + " - PERFORMED\n", + " - NOT YET DONE\n", + " time: col(respChartPerformedTimestamp)\n", + " resp_chart_id: respchartid\n", + " type_cat: respcharttypecat\n", + " value_label: respchartvaluelabel\n", + " value: respchartvalue\n", + " resp_charting_entered:\n", + " code:\n", + " - RESP_CHARTING\n", + " - ENTERED\n", + " - NOT YET DONE\n", + " time: col(respChartEnteredTimestamp)\n", + " resp_chart_id: respchartid\n", + " type_cat: respcharttypecat\n", + " value_label: respchartvaluelabel\n", + " value: respchartvalue\n", + "treatment:\n", + " treatment:\n", + " code:\n", + " - TREATMENT\n", + " - ENTERED\n", + " - col(treatmentstring)\n", + " time: col(treatmentEnteredTimestamp)\n", + " treatment_id: treatmentid\n", + "vitalAperiodic:\n", + " non_invasive_systolic:\n", + " code:\n", + " - VITALS\n", + " - APERIODIC\n", + " - BP\n", + " - NONINVASIVE_SYSTOLIC\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalaperiodicid\n", + " numeric_value: noninvasivesystolic\n", + " non_invasive_diastolic:\n", + " code:\n", + " - VITALS\n", + " - APERIODIC\n", + " - BP\n", + " - NONINVASIVE_DIASTOLIC\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalaperiodicid\n", + " numeric_value: noninvasivediastolic\n", + " non_invasive_mean:\n", + " code:\n", + " - VITALS\n", + " - APERIODIC\n", + " - BP\n", + " - NONINVASIVE_MEAN\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalaperiodicid\n", + " numeric_value: noninvasivemean\n", + " paop:\n", + " code:\n", + " - VITALS\n", + " - APERIODIC\n", + " - PAOP\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalaperiodicid\n", + " numeric_value: paop\n", + " cardiac_output:\n", + " code:\n", + " - VITALS\n", + " - APERIODIC\n", + " - CARDIAC_OUTPUT\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalaperiodicid\n", + " numeric_value: cardiacoutput\n", + " cardiac_input:\n", + " code:\n", + " - VITALS\n", + " - APERIODIC\n", + " - CARDIAC_INPUT\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalaperiodicid\n", + " numeric_value: cardiacinput\n", + " svr:\n", + " code:\n", + " - VITALS\n", + " - APERIODIC\n", + " - SVR\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalaperiodicid\n", + " numeric_value: svr\n", + " svri:\n", + " code:\n", + " - VITALS\n", + " - APERIODIC\n", + " - SVRI\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalaperiodicid\n", + " numeric_value: svri\n", + " pvr:\n", + " code:\n", + " - VITALS\n", + " - APERIODIC\n", + " - PVR\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalaperiodicid\n", + " numeric_value: pvr\n", + " pvri:\n", + " code:\n", + " - VITALS\n", + " - APERIODIC\n", + " - PVRI\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalaperiodicid\n", + " numeric_value: pvri\n", + "vitalPeriodic:\n", + " temperature:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - TEMPERATURE\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: temperature\n", + " saO2:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - SAO2\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: sao2\n", + " heartRate:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - HEARTRATE\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: heartrate\n", + " respiration:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - RESPIRATION\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: respiration\n", + " cvp:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - CVP\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: cvp\n", + " etCo2:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - ETCO2\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: etco2\n", + " systemic_systolic:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - BP\n", + " - SYSTEMIC_SYSTOLIC\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: systemicsystolic\n", + " systemic_diastolic:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - BP\n", + " - SYSTEMIC_DIASTOLIC\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: systemicdiastolic\n", + " systemic_mean:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - BP\n", + " - SYSTEMIC_MEAN\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: systemicmean\n", + " pa_systolic:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - BP\n", + " - PULM_ART_SYSTOLIC\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: pasystolic\n", + " pa_diastolic:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - BP\n", + " - PULM_ART_DIASTOLIC\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: padiastolic\n", + " pa_mean:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - BP\n", + " - PULM_ART_MEAN\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: pamean\n", + " st1:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - ST1\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: st1\n", + " st2:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - ST2\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: st2\n", + " st3:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - ST3\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: st3\n", + " ICP:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - ICP\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: icp\n", + "\n", + "2024-12-14 17:20:15.793 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare/[0-42080).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseCare/[0-42080).parquet\n", + "2024-12-14 17:20:15.794 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:15.794543. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:15.795 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare/[0-42080).parquet\n", + "2024-12-14 17:20:15.796 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:802: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", + " typed_subjects = pl.Series(subjects, dtype=df.schema[input_subject_id_column])\n", + "2024-12-14 17:20:15.802 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for nurseCare/[0-42080).parquet\n", + "2024-12-14 17:20:15.806 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_care_performed\n", + "2024-12-14 17:20:15.807 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseCarePerformedTimestamp should already be in Date/time format\n", + "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:513: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", + " if v not in df.schema:\n", + "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:517: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", + " is_numeric = df.schema[v].is_numeric()\n", + "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:518: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", + " is_str = df.schema[v] == pl.Utf8\n", + "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:519: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", + " is_cat = isinstance(df.schema[v], pl.Categorical)\n", + "2024-12-14 17:20:15.809 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseCarePerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:15.809 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_care_entered\n", + "2024-12-14 17:20:15.809 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseCareEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:15.809 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseCareEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:15.810 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseCare/[0-42080).parquet\n", + "2024-12-14 17:20:15.857 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.063106\n", + "2024-12-14 17:20:15.857 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseCare/.[0-42080).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseCare/.[0-42080).parquet_cache/locks/2024-12-14T17:20:15.794543.json\n", + "2024-12-14 17:20:15.858 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL/[0-15).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanEOL/[0-15).parquet\n", + "2024-12-14 17:20:15.858 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:15.858597. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:15.858 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL/[0-15).parquet\n", + "2024-12-14 17:20:15.858 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:15.859 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanEOL/[0-15).parquet\n", + "2024-12-14 17:20:15.862 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplEolDiscussion\n", + "2024-12-14 17:20:15.862 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanEolDiscussionOccurredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:15.862 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanEolDiscussionOccurredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:15.863 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanEOL/[0-15).parquet\n", + "2024-12-14 17:20:15.865 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007113\n", + "2024-12-14 17:20:15.865 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanEOL/.[0-15).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanEOL/.[0-15).parquet_cache/locks/2024-12-14T17:20:15.858597.json\n", + "2024-12-14 17:20:15.866 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting/[0-1477163).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseCharting/[0-1477163).parquet\n", + "2024-12-14 17:20:15.866 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:15.866664. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:15.866 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting/[0-1477163).parquet\n", + "2024-12-14 17:20:15.867 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:15.867 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for nurseCharting/[0-1477163).parquet\n", + "2024-12-14 17:20:15.871 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_charting_performed\n", + "2024-12-14 17:20:15.871 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nursingChartPerformedTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:15.871 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nursingChartPerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:15.871 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_charting_entered\n", + "2024-12-14 17:20:15.872 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nursingChartEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:15.872 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nursingChartEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:15.872 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseCharting/[0-1477163).parquet\n", + "2024-12-14 17:20:16.546 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.680040\n", + "2024-12-14 17:20:16.546 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseCharting/.[0-1477163).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseCharting/.[0-1477163).parquet_cache/locks/2024-12-14T17:20:15.866664.json\n", + "2024-12-14 17:20:16.548 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic/[0-274088).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/vitalAperiodic/[0-274088).parquet\n", + "2024-12-14 17:20:16.549 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:16.549008. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:16.549 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic/[0-274088).parquet\n", + "2024-12-14 17:20:16.549 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:16.550 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for vitalAperiodic/[0-274088).parquet\n", + "2024-12-14 17:20:16.555 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting non_invasive_systolic\n", + "2024-12-14 17:20:16.555 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:16.556 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:16.556 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting non_invasive_diastolic\n", + "2024-12-14 17:20:16.556 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:16.556 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:16.556 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting non_invasive_mean\n", + "2024-12-14 17:20:16.557 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:16.557 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:16.557 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting paop\n", + "2024-12-14 17:20:16.557 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:16.557 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:16.557 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cardiac_output\n", + "2024-12-14 17:20:16.558 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:16.558 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:16.558 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cardiac_input\n", + "2024-12-14 17:20:16.558 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:16.558 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:16.558 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting svr\n", + "2024-12-14 17:20:16.559 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:16.559 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:16.559 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting svri\n", + "2024-12-14 17:20:16.559 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:16.559 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:16.559 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pvr\n", + "2024-12-14 17:20:16.560 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:16.560 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:16.560 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pvri\n", + "2024-12-14 17:20:16.560 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:16.560 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:16.560 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/vitalAperiodic/[0-274088).parquet\n", + "2024-12-14 17:20:16.862 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.313909\n", + "2024-12-14 17:20:16.863 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/vitalAperiodic/.[0-274088).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/vitalAperiodic/.[0-274088).parquet_cache/locks/2024-12-14T17:20:16.549008.json\n", + "2024-12-14 17:20:16.863 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment/[0-91589).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseAssessment/[0-91589).parquet\n", + "2024-12-14 17:20:16.864 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:16.864327. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:16.864 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment/[0-91589).parquet\n", + "2024-12-14 17:20:16.864 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:16.865 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for nurseAssessment/[0-91589).parquet\n", + "2024-12-14 17:20:16.868 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_assessment_performed\n", + "2024-12-14 17:20:16.869 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseAssessPerformedTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:16.869 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseAssessPerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:16.869 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_assessment_entered\n", + "2024-12-14 17:20:16.869 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseAssessEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:16.870 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseAssessEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:16.870 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseAssessment/[0-91589).parquet\n", + "2024-12-14 17:20:16.912 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.048560\n", + "2024-12-14 17:20:16.912 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseAssessment/.[0-91589).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseAssessment/.[0-91589).parquet_cache/locks/2024-12-14T17:20:16.864327.json\n", + "2024-12-14 17:20:16.913 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication/[0-75604).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/medication/[0-75604).parquet\n", + "2024-12-14 17:20:16.914 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:16.914149. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:16.914 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication/[0-75604).parquet\n", + "2024-12-14 17:20:16.914 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:16.914 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for medication/[0-75604).parquet\n", + "2024-12-14 17:20:16.918 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drug_ordered\n", + "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:468: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", + " if col not in df.schema:\n", + "2024-12-14 17:20:16.919 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drugname\n", + "2024-12-14 17:20:16.919 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - drugordertimestamp should already be in Date/time format\n", + "2024-12-14 17:20:16.920 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"drugordertimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:16.920 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drug_started\n", + "2024-12-14 17:20:16.920 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drugname\n", + "2024-12-14 17:20:16.920 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - drugstarttimestamp should already be in Date/time format\n", + "2024-12-14 17:20:16.920 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"drugstarttimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:16.920 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drug_stopped\n", + "2024-12-14 17:20:16.920 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drugname\n", + "2024-12-14 17:20:16.920 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - drugstoptimestamp should already be in Date/time format\n", + "2024-12-14 17:20:16.921 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"drugstoptimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:16.921 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/medication/[0-75604).parquet\n", + "2024-12-14 17:20:16.976 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.062129\n", + "2024-12-14 17:20:16.976 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/medication/.[0-75604).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/medication/.[0-75604).parquet_cache/locks/2024-12-14T17:20:16.914149.json\n", + "2024-12-14 17:20:16.976 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis/[0-24978).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/diagnosis/[0-24978).parquet\n", + "2024-12-14 17:20:16.977 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:16.977324. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:16.977 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis/[0-24978).parquet\n", + "2024-12-14 17:20:16.977 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:16.978 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for diagnosis/[0-24978).parquet\n", + "2024-12-14 17:20:16.981 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting diagnosis\n", + "2024-12-14 17:20:16.982 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column diagnosispriority\n", + "2024-12-14 17:20:16.982 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd9code\n", + "2024-12-14 17:20:16.982 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - diagnosisEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:16.982 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"diagnosisEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:16.982 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/diagnosis/[0-24978).parquet\n", + "2024-12-14 17:20:16.989 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.012023\n", + "2024-12-14 17:20:16.989 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/diagnosis/.[0-24978).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/diagnosis/.[0-24978).parquet_cache/locks/2024-12-14T17:20:16.977324.json\n", + "2024-12-14 17:20:16.989 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy/[0-2475).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/allergy/[0-2475).parquet\n", + "2024-12-14 17:20:16.990 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:16.990072. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:16.990 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy/[0-2475).parquet\n", + "2024-12-14 17:20:16.990 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:16.990 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for allergy/[0-2475).parquet\n", + "2024-12-14 17:20:16.994 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting allergy\n", + "2024-12-14 17:20:16.994 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column allergyname\n", + "2024-12-14 17:20:16.994 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column allergytype\n", + "2024-12-14 17:20:16.994 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - allergyEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:16.994 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"allergyEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:16.994 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/allergy/[0-2475).parquet\n", + "2024-12-14 17:20:16.997 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007124\n", + "2024-12-14 17:20:16.997 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/allergy/.[0-2475).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/allergy/.[0-2475).parquet_cache/locks/2024-12-14T17:20:16.990072.json\n", + "2024-12-14 17:20:16.997 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease/[0-112).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanInfectiousDisease/[0-112).parquet\n", + "2024-12-14 17:20:16.998 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:16.998026. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:16.998 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease/[0-112).parquet\n", + "2024-12-14 17:20:16.998 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:16.998 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanInfectiousDisease/[0-112).parquet\n", + "2024-12-14 17:20:17.002 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplInfectDisease\n", + "2024-12-14 17:20:17.002 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column infectdiseaseassessment\n", + "2024-12-14 17:20:17.002 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column responsetotherapy\n", + "2024-12-14 17:20:17.002 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column infectdiseasesite\n", + "2024-12-14 17:20:17.002 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column treatment\n", + "2024-12-14 17:20:17.002 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanInfectDiseaseEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.002 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanInfectDiseaseEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.002 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanInfectiousDisease/[0-112).parquet\n", + "2024-12-14 17:20:17.004 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006326\n", + "2024-12-14 17:20:17.004 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanInfectiousDisease/.[0-112).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanInfectiousDisease/.[0-112).parquet_cache/locks/2024-12-14T17:20:16.998026.json\n", + "2024-12-14 17:20:17.004 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam/[0-84058).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/physicalExam/[0-84058).parquet\n", + "2024-12-14 17:20:17.005 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:17.005067. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:17.005 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam/[0-84058).parquet\n", + "2024-12-14 17:20:17.005 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:17.005 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for physicalExam/[0-84058).parquet\n", + "2024-12-14 17:20:17.009 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting physical_exam_entered\n", + "2024-12-14 17:20:17.009 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - physicalExamEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.009 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"physicalExamEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.009 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/physicalExam/[0-84058).parquet\n", + "2024-12-14 17:20:17.032 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.026926\n", + "2024-12-14 17:20:17.032 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/physicalExam/.[0-84058).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/physicalExam/.[0-84058).parquet_cache/locks/2024-12-14T17:20:17.005067.json\n", + "2024-12-14 17:20:17.032 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment/[0-38290).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/treatment/[0-38290).parquet\n", + "2024-12-14 17:20:17.033 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:17.032910. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:17.033 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment/[0-38290).parquet\n", + "2024-12-14 17:20:17.033 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:17.033 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for treatment/[0-38290).parquet\n", + "2024-12-14 17:20:17.037 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting treatment\n", + "2024-12-14 17:20:17.037 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column treatmentstring\n", + "2024-12-14 17:20:17.037 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - treatmentEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.038 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"treatmentEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.038 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/treatment/[0-38290).parquet\n", + "2024-12-14 17:20:17.046 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.013724\n", + "2024-12-14 17:20:17.046 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/treatment/.[0-38290).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/treatment/.[0-38290).parquet_cache/locks/2024-12-14T17:20:17.032910.json\n", + "2024-12-14 17:20:17.047 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory/[0-12109).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/pastHistory/[0-12109).parquet\n", + "2024-12-14 17:20:17.047 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:17.047613. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:17.047 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory/[0-12109).parquet\n", + "2024-12-14 17:20:17.047 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:17.048 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for pastHistory/[0-12109).parquet\n", + "2024-12-14 17:20:17.051 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting past_history_taken\n", + "2024-12-14 17:20:17.052 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - pastHistoryTakenTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.052 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"pastHistoryTakenTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.052 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting past_history_entered\n", + "2024-12-14 17:20:17.053 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - pastHistoryEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.053 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"pastHistoryEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.053 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/pastHistory/[0-12109).parquet\n", + "2024-12-14 17:20:17.063 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.015651\n", + "2024-12-14 17:20:17.063 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/pastHistory/.[0-12109).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/pastHistory/.[0-12109).parquet_cache/locks/2024-12-14T17:20:17.047613.json\n", + "2024-12-14 17:20:17.064 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral/[0-33148).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanGeneral/[0-33148).parquet\n", + "2024-12-14 17:20:17.064 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:17.064617. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:17.064 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral/[0-33148).parquet\n", + "2024-12-14 17:20:17.064 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:17.065 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanGeneral/[0-33148).parquet\n", + "2024-12-14 17:20:17.068 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplItem\n", + "2024-12-14 17:20:17.068 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgroup\n", + "2024-12-14 17:20:17.068 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplitemvalue\n", + "2024-12-14 17:20:17.068 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanGeneralItemEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.069 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanGeneralItemEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.069 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanGeneral/[0-33148).parquet\n", + "2024-12-14 17:20:17.075 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.010798\n", + "2024-12-14 17:20:17.075 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanGeneral/.[0-33148).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanGeneral/.[0-33148).parquet_cache/locks/2024-12-14T17:20:17.064617.json\n", + "2024-12-14 17:20:17.075 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal/[0-3633).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanGoal/[0-3633).parquet\n", + "2024-12-14 17:20:17.076 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:17.076091. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:17.076 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal/[0-3633).parquet\n", + "2024-12-14 17:20:17.076 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:17.076 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanGoal/[0-3633).parquet\n", + "2024-12-14 17:20:17.079 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplGoal\n", + "2024-12-14 17:20:17.080 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgoalcategory\n", + "2024-12-14 17:20:17.080 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgoalvalue\n", + "2024-12-14 17:20:17.080 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgoalstatus\n", + "2024-12-14 17:20:17.080 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanGoalEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.080 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanGoalEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.080 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanGoal/[0-3633).parquet\n", + "2024-12-14 17:20:17.083 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007029\n", + "2024-12-14 17:20:17.083 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanGoal/.[0-3633).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanGoal/.[0-3633).parquet_cache/locks/2024-12-14T17:20:17.076091.json\n", + "2024-12-14 17:20:17.084 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare/[0-5436).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/respiratoryCare/[0-5436).parquet\n", + "2024-12-14 17:20:17.084 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:17.084405. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:17.084 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare/[0-5436).parquet\n", + "2024-12-14 17:20:17.084 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:17.085 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for respiratoryCare/[0-5436).parquet\n", + "2024-12-14 17:20:17.089 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting resp_care_status\n", + "2024-12-14 17:20:17.089 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - respCareStatusEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.092 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"respCareStatusEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.092 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting vent_start\n", + "2024-12-14 17:20:17.092 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - ventStartTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.092 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"ventStartTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.092 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting vent_end\n", + "2024-12-14 17:20:17.093 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - ventEndTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.093 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"ventEndTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.093 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/respiratoryCare/[0-5436).parquet\n", + "2024-12-14 17:20:17.103 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.018818\n", + "2024-12-14 17:20:17.103 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/respiratoryCare/.[0-5436).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/respiratoryCare/.[0-5436).parquet_cache/locks/2024-12-14T17:20:17.084405.json\n", + "2024-12-14 17:20:17.103 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx/[0-7578).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/admissionDx/[0-7578).parquet\n", + "2024-12-14 17:20:17.103 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:17.103882. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:17.104 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx/[0-7578).parquet\n", + "2024-12-14 17:20:17.104 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:17.104 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for admissionDx/[0-7578).parquet\n", + "2024-12-14 17:20:17.107 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting admission_diagnosis\n", + "2024-12-14 17:20:17.108 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admitdxname\n", + "2024-12-14 17:20:17.108 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - admitDxEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.108 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"admitDxEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.108 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/admissionDx/[0-7578).parquet\n", + "2024-12-14 17:20:17.111 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007481\n", + "2024-12-14 17:20:17.111 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/admissionDx/.[0-7578).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/admissionDx/.[0-7578).parquet_cache/locks/2024-12-14T17:20:17.103882.json\n", + "2024-12-14 17:20:17.111 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab/[0-434660).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/lab/[0-434660).parquet\n", + "2024-12-14 17:20:17.112 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:17.112132. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:17.112 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab/[0-434660).parquet\n", + "2024-12-14 17:20:17.112 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:17.112 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for lab/[0-434660).parquet\n", + "2024-12-14 17:20:17.116 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting lab\n", + "2024-12-14 17:20:17.116 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column labname\n", + "2024-12-14 17:20:17.116 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column labmeasurenamesystem\n", + "2024-12-14 17:20:17.116 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column labmeasurenameinterface\n", + "2024-12-14 17:20:17.116 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - labResultDrawnTimestamp should already be in Date/time format\n", + "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:529: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", + " case \"text_value\" if not df.schema[v] == pl.Utf8:\n", + "2024-12-14 17:20:17.117 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"labResultDrawnTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.117 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/lab/[0-434660).parquet\n", + "2024-12-14 17:20:17.185 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.073375\n", + "2024-12-14 17:20:17.185 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/lab/.[0-434660).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/lab/.[0-434660).parquet_cache/locks/2024-12-14T17:20:17.112132.json\n", + "2024-12-14 17:20:17.186 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient/[0-2520).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/patient/[0-2520).parquet\n", + "2024-12-14 17:20:17.187 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:17.187269. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:17.187 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient/[0-2520).parquet\n", + "2024-12-14 17:20:17.187 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:17.188 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for patient/[0-2520).parquet\n", + "2024-12-14 17:20:17.192 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting dob\n", + "2024-12-14 17:20:17.192 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - dateofbirth should already be in Date/time format\n", + "2024-12-14 17:20:17.192 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dateofbirth\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.193 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting gender\n", + "2024-12-14 17:20:17.193 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column gender\n", + "2024-12-14 17:20:17.193 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:493 - Adding null literate for time\n", + "2024-12-14 17:20:17.193 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ethnicity\n", + "2024-12-14 17:20:17.193 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column ethnicity\n", + "2024-12-14 17:20:17.193 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:493 - Adding null literate for time\n", + "2024-12-14 17:20:17.193 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting hosp_admission\n", + "2024-12-14 17:20:17.194 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitaladmitsource\n", + "2024-12-14 17:20:17.194 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitalteachingstatus\n", + "2024-12-14 17:20:17.194 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitalregion\n", + "2024-12-14 17:20:17.194 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitalnumbedscategory\n", + "2024-12-14 17:20:17.194 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - hospitaladmittimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.194 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hospitaladmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.194 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting hosp_discharge\n", + "2024-12-14 17:20:17.194 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitaldischargestatus\n", + "2024-12-14 17:20:17.194 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitaldischargelocation\n", + "2024-12-14 17:20:17.194 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - hospitaldischargetimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.194 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hospitaldischargetimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.194 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_admission\n", + "2024-12-14 17:20:17.195 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitstaytype\n", + "2024-12-14 17:20:17.195 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitadmitsource\n", + "2024-12-14 17:20:17.195 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitadmittimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.195 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitadmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.195 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_admission_weight\n", + "2024-12-14 17:20:17.195 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitadmittimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.195 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitadmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.195 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_admission_height\n", + "2024-12-14 17:20:17.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitadmittimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitadmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_discharge\n", + "2024-12-14 17:20:17.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitdischargelocation\n", + "2024-12-14 17:20:17.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitdischargestatus\n", + "2024-12-14 17:20:17.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitdischargetimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitdischargetimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_discharge_weight\n", + "2024-12-14 17:20:17.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitdischargetimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.197 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitdischargetimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.197 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/patient/[0-2520).parquet\n", + "2024-12-14 17:20:17.211 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.024675\n", + "2024-12-14 17:20:17.212 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/patient/.[0-2520).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/patient/.[0-2520).parquet_cache/locks/2024-12-14T17:20:17.187269.json\n", + "2024-12-14 17:20:17.212 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting/[0-176089).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/respiratoryCharting/[0-176089).parquet\n", + "2024-12-14 17:20:17.212 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:17.212843. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:17.213 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting/[0-176089).parquet\n", + "2024-12-14 17:20:17.213 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:17.213 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for respiratoryCharting/[0-176089).parquet\n", + "2024-12-14 17:20:17.217 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting resp_charting_performed\n", + "2024-12-14 17:20:17.217 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - respChartPerformedTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.217 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"respChartPerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.217 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting resp_charting_entered\n", + "2024-12-14 17:20:17.218 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - respChartEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.218 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"respChartEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.218 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/respiratoryCharting/[0-176089).parquet\n", + "2024-12-14 17:20:17.284 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.071501\n", + "2024-12-14 17:20:17.284 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/respiratoryCharting/.[0-176089).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/respiratoryCharting/.[0-176089).parquet_cache/locks/2024-12-14T17:20:17.212843.json\n", + "2024-12-14 17:20:17.287 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic/[0-1634960).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 17:20:17.288 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:17.288028. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:17.288 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 17:20:17.288 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:17.288 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 17:20:17.294 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting temperature\n", + "2024-12-14 17:20:17.294 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.295 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.295 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting saO2\n", + "2024-12-14 17:20:17.295 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.295 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.295 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting heartRate\n", + "2024-12-14 17:20:17.296 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.296 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.296 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting respiration\n", + "2024-12-14 17:20:17.296 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.296 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.296 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cvp\n", + "2024-12-14 17:20:17.297 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.297 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.297 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting etCo2\n", + "2024-12-14 17:20:17.297 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.297 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.298 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting systemic_systolic\n", + "2024-12-14 17:20:17.298 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.298 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.298 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting systemic_diastolic\n", + "2024-12-14 17:20:17.298 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.299 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.299 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting systemic_mean\n", + "2024-12-14 17:20:17.299 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.299 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.299 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pa_systolic\n", + "2024-12-14 17:20:17.299 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.300 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.300 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pa_diastolic\n", + "2024-12-14 17:20:17.300 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.300 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.300 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pa_mean\n", + "2024-12-14 17:20:17.301 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.301 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.301 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting st1\n", + "2024-12-14 17:20:17.301 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.301 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.301 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting st2\n", + "2024-12-14 17:20:17.302 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.302 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.302 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting st3\n", + "2024-12-14 17:20:17.302 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.302 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.302 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ICP\n", + "2024-12-14 17:20:17.303 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.303 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.303 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 17:20:20.250 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:02.962939\n", + "2024-12-14 17:20:20.251 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/vitalPeriodic/.[0-1634960).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/vitalPeriodic/.[0-1634960).parquet_cache/locks/2024-12-14T17:20:17.288028.json\n", + "2024-12-14 17:20:20.251 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare/[0-42080).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseCare/[0-42080).parquet\n", + "2024-12-14 17:20:20.252 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.252404. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:20.252 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare/[0-42080).parquet\n", + "2024-12-14 17:20:20.252 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:20.253 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for nurseCare/[0-42080).parquet\n", + "2024-12-14 17:20:20.257 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_care_performed\n", + "2024-12-14 17:20:20.257 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseCarePerformedTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.257 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseCarePerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.257 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_care_entered\n", + "2024-12-14 17:20:20.258 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseCareEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.258 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseCareEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.258 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseCare/[0-42080).parquet\n", + "2024-12-14 17:20:20.273 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.021446\n", + "2024-12-14 17:20:20.273 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseCare/.[0-42080).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseCare/.[0-42080).parquet_cache/locks/2024-12-14T17:20:20.252404.json\n", + "2024-12-14 17:20:20.274 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL/[0-15).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanEOL/[0-15).parquet\n", + "2024-12-14 17:20:20.274 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.274563. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:20.274 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL/[0-15).parquet\n", + "2024-12-14 17:20:20.274 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:20.275 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanEOL/[0-15).parquet\n", + "2024-12-14 17:20:20.278 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplEolDiscussion\n", + "2024-12-14 17:20:20.278 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanEolDiscussionOccurredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.278 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanEolDiscussionOccurredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.278 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanEOL/[0-15).parquet\n", + "2024-12-14 17:20:20.280 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006231\n", + "2024-12-14 17:20:20.280 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanEOL/.[0-15).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanEOL/.[0-15).parquet_cache/locks/2024-12-14T17:20:20.274563.json\n", + "2024-12-14 17:20:20.281 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting/[0-1477163).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseCharting/[0-1477163).parquet\n", + "2024-12-14 17:20:20.281 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.281752. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:20.282 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting/[0-1477163).parquet\n", + "2024-12-14 17:20:20.282 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:20.282 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for nurseCharting/[0-1477163).parquet\n", + "2024-12-14 17:20:20.285 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_charting_performed\n", + "2024-12-14 17:20:20.286 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nursingChartPerformedTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.286 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nursingChartPerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.286 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_charting_entered\n", + "2024-12-14 17:20:20.287 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nursingChartEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.287 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nursingChartEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.287 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseCharting/[0-1477163).parquet\n", + "2024-12-14 17:20:20.622 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.341012\n", + "2024-12-14 17:20:20.622 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseCharting/.[0-1477163).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseCharting/.[0-1477163).parquet_cache/locks/2024-12-14T17:20:20.281752.json\n", + "2024-12-14 17:20:20.624 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic/[0-274088).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/vitalAperiodic/[0-274088).parquet\n", + "2024-12-14 17:20:20.625 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.625045. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:20.625 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic/[0-274088).parquet\n", + "2024-12-14 17:20:20.625 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:20.625 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for vitalAperiodic/[0-274088).parquet\n", + "2024-12-14 17:20:20.630 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting non_invasive_systolic\n", + "2024-12-14 17:20:20.630 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.631 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.631 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting non_invasive_diastolic\n", + "2024-12-14 17:20:20.631 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.631 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.631 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting non_invasive_mean\n", + "2024-12-14 17:20:20.632 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.632 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.632 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting paop\n", + "2024-12-14 17:20:20.632 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.632 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.632 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cardiac_output\n", + "2024-12-14 17:20:20.633 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.633 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.633 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cardiac_input\n", + "2024-12-14 17:20:20.633 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.633 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.633 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting svr\n", + "2024-12-14 17:20:20.634 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.634 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.634 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting svri\n", + "2024-12-14 17:20:20.634 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.634 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.634 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pvr\n", + "2024-12-14 17:20:20.635 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.635 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.635 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pvri\n", + "2024-12-14 17:20:20.635 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.636 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.636 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/vitalAperiodic/[0-274088).parquet\n", + "2024-12-14 17:20:20.756 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.131885\n", + "2024-12-14 17:20:20.757 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/vitalAperiodic/.[0-274088).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/vitalAperiodic/.[0-274088).parquet_cache/locks/2024-12-14T17:20:20.625045.json\n", + "2024-12-14 17:20:20.757 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment/[0-91589).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseAssessment/[0-91589).parquet\n", + "2024-12-14 17:20:20.758 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.758208. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:20.758 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment/[0-91589).parquet\n", + "2024-12-14 17:20:20.758 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:20.758 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for nurseAssessment/[0-91589).parquet\n", + "2024-12-14 17:20:20.763 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_assessment_performed\n", + "2024-12-14 17:20:20.763 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseAssessPerformedTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.764 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseAssessPerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.764 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_assessment_entered\n", + "2024-12-14 17:20:20.764 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseAssessEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.764 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseAssessEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.764 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseAssessment/[0-91589).parquet\n", + "2024-12-14 17:20:20.785 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.026920\n", + "2024-12-14 17:20:20.785 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseAssessment/.[0-91589).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseAssessment/.[0-91589).parquet_cache/locks/2024-12-14T17:20:20.758208.json\n", + "2024-12-14 17:20:20.786 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication/[0-75604).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/medication/[0-75604).parquet\n", + "2024-12-14 17:20:20.786 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.786460. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:20.786 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication/[0-75604).parquet\n", + "2024-12-14 17:20:20.786 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:20.787 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for medication/[0-75604).parquet\n", + "2024-12-14 17:20:20.791 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drug_ordered\n", + "2024-12-14 17:20:20.791 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drugname\n", + "2024-12-14 17:20:20.791 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - drugordertimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.792 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"drugordertimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.792 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drug_started\n", + "2024-12-14 17:20:20.792 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drugname\n", + "2024-12-14 17:20:20.792 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - drugstarttimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.792 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"drugstarttimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.792 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drug_stopped\n", + "2024-12-14 17:20:20.793 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drugname\n", + "2024-12-14 17:20:20.793 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - drugstoptimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.793 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"drugstoptimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.793 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/medication/[0-75604).parquet\n", + "2024-12-14 17:20:20.825 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.038700\n", + "2024-12-14 17:20:20.825 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/medication/.[0-75604).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/medication/.[0-75604).parquet_cache/locks/2024-12-14T17:20:20.786460.json\n", + "2024-12-14 17:20:20.825 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis/[0-24978).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/diagnosis/[0-24978).parquet\n", + "2024-12-14 17:20:20.826 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.826051. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:20.826 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis/[0-24978).parquet\n", + "2024-12-14 17:20:20.826 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:20.826 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for diagnosis/[0-24978).parquet\n", + "2024-12-14 17:20:20.830 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting diagnosis\n", + "2024-12-14 17:20:20.830 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column diagnosispriority\n", + "2024-12-14 17:20:20.830 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd9code\n", + "2024-12-14 17:20:20.830 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - diagnosisEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.830 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"diagnosisEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.830 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/diagnosis/[0-24978).parquet\n", + "2024-12-14 17:20:20.834 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008767\n", + "2024-12-14 17:20:20.834 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/diagnosis/.[0-24978).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/diagnosis/.[0-24978).parquet_cache/locks/2024-12-14T17:20:20.826051.json\n", + "2024-12-14 17:20:20.835 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy/[0-2475).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/allergy/[0-2475).parquet\n", + "2024-12-14 17:20:20.835 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.835439. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:20.835 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy/[0-2475).parquet\n", + "2024-12-14 17:20:20.835 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:20.835 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for allergy/[0-2475).parquet\n", + "2024-12-14 17:20:20.849 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting allergy\n", + "2024-12-14 17:20:20.849 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column allergyname\n", + "2024-12-14 17:20:20.849 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column allergytype\n", + "2024-12-14 17:20:20.850 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - allergyEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.850 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"allergyEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.850 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/allergy/[0-2475).parquet\n", + "2024-12-14 17:20:20.855 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.019692\n", + "2024-12-14 17:20:20.855 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/allergy/.[0-2475).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/allergy/.[0-2475).parquet_cache/locks/2024-12-14T17:20:20.835439.json\n", + "2024-12-14 17:20:20.858 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease/[0-112).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanInfectiousDisease/[0-112).parquet\n", + "2024-12-14 17:20:20.860 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.859824. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:20.861 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease/[0-112).parquet\n", + "2024-12-14 17:20:20.861 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:20.861 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanInfectiousDisease/[0-112).parquet\n", + "2024-12-14 17:20:20.866 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplInfectDisease\n", + "2024-12-14 17:20:20.866 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column infectdiseaseassessment\n", + "2024-12-14 17:20:20.866 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column responsetotherapy\n", + "2024-12-14 17:20:20.866 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column infectdiseasesite\n", + "2024-12-14 17:20:20.866 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column treatment\n", + "2024-12-14 17:20:20.866 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanInfectDiseaseEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.866 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanInfectDiseaseEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.866 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanInfectiousDisease/[0-112).parquet\n", + "2024-12-14 17:20:20.869 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.009974\n", + "2024-12-14 17:20:20.869 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanInfectiousDisease/.[0-112).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanInfectiousDisease/.[0-112).parquet_cache/locks/2024-12-14T17:20:20.859824.json\n", + "2024-12-14 17:20:20.870 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam/[0-84058).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/physicalExam/[0-84058).parquet\n", + "2024-12-14 17:20:20.870 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.870785. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:20.871 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam/[0-84058).parquet\n", + "2024-12-14 17:20:20.871 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:20.871 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for physicalExam/[0-84058).parquet\n", + "2024-12-14 17:20:20.875 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting physical_exam_entered\n", + "2024-12-14 17:20:20.875 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - physicalExamEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.875 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"physicalExamEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.876 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/physicalExam/[0-84058).parquet\n", + "2024-12-14 17:20:20.886 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.016078\n", + "2024-12-14 17:20:20.886 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/physicalExam/.[0-84058).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/physicalExam/.[0-84058).parquet_cache/locks/2024-12-14T17:20:20.870785.json\n", + "2024-12-14 17:20:20.887 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment/[0-38290).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/treatment/[0-38290).parquet\n", + "2024-12-14 17:20:20.887 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.887774. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:20.888 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment/[0-38290).parquet\n", + "2024-12-14 17:20:20.888 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:20.888 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for treatment/[0-38290).parquet\n", + "2024-12-14 17:20:20.892 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting treatment\n", + "2024-12-14 17:20:20.892 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column treatmentstring\n", + "2024-12-14 17:20:20.892 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - treatmentEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.892 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"treatmentEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.892 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/treatment/[0-38290).parquet\n", + "2024-12-14 17:20:20.898 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.010328\n", + "2024-12-14 17:20:20.898 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/treatment/.[0-38290).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/treatment/.[0-38290).parquet_cache/locks/2024-12-14T17:20:20.887774.json\n", + "2024-12-14 17:20:20.898 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory/[0-12109).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/pastHistory/[0-12109).parquet\n", + "2024-12-14 17:20:20.899 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.899071. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:20.899 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory/[0-12109).parquet\n", + "2024-12-14 17:20:20.899 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:20.899 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for pastHistory/[0-12109).parquet\n", + "2024-12-14 17:20:20.903 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting past_history_taken\n", + "2024-12-14 17:20:20.903 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - pastHistoryTakenTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.904 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"pastHistoryTakenTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.904 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting past_history_entered\n", + "2024-12-14 17:20:20.904 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - pastHistoryEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.905 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"pastHistoryEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.905 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/pastHistory/[0-12109).parquet\n", + "2024-12-14 17:20:20.911 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.012724\n", + "2024-12-14 17:20:20.911 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/pastHistory/.[0-12109).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/pastHistory/.[0-12109).parquet_cache/locks/2024-12-14T17:20:20.899071.json\n", + "2024-12-14 17:20:20.912 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral/[0-33148).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanGeneral/[0-33148).parquet\n", + "2024-12-14 17:20:20.912 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.912592. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:20.912 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral/[0-33148).parquet\n", + "2024-12-14 17:20:20.912 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:20.913 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanGeneral/[0-33148).parquet\n", + "2024-12-14 17:20:20.916 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplItem\n", + "2024-12-14 17:20:20.917 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgroup\n", + "2024-12-14 17:20:20.917 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplitemvalue\n", + "2024-12-14 17:20:20.917 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanGeneralItemEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.917 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanGeneralItemEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.917 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanGeneral/[0-33148).parquet\n", + "2024-12-14 17:20:20.921 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008875\n", + "2024-12-14 17:20:20.921 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanGeneral/.[0-33148).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanGeneral/.[0-33148).parquet_cache/locks/2024-12-14T17:20:20.912592.json\n", + "2024-12-14 17:20:20.921 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal/[0-3633).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanGoal/[0-3633).parquet\n", + "2024-12-14 17:20:20.922 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.922179. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:20.922 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal/[0-3633).parquet\n", + "2024-12-14 17:20:20.922 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:20.922 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanGoal/[0-3633).parquet\n", + "2024-12-14 17:20:20.926 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplGoal\n", + "2024-12-14 17:20:20.927 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgoalcategory\n", + "2024-12-14 17:20:20.927 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgoalvalue\n", + "2024-12-14 17:20:20.927 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgoalstatus\n", + "2024-12-14 17:20:20.927 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanGoalEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.927 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanGoalEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.927 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanGoal/[0-3633).parquet\n", + "2024-12-14 17:20:20.929 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007137\n", + "2024-12-14 17:20:20.929 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanGoal/.[0-3633).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanGoal/.[0-3633).parquet_cache/locks/2024-12-14T17:20:20.922179.json\n", + "2024-12-14 17:20:20.930 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare/[0-5436).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/respiratoryCare/[0-5436).parquet\n", + "2024-12-14 17:20:20.930 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.930594. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:20.930 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare/[0-5436).parquet\n", + "2024-12-14 17:20:20.930 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:20.931 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for respiratoryCare/[0-5436).parquet\n", + "2024-12-14 17:20:20.935 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting resp_care_status\n", + "2024-12-14 17:20:20.935 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - respCareStatusEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.938 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"respCareStatusEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.938 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting vent_start\n", + "2024-12-14 17:20:20.938 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - ventStartTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.939 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"ventStartTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.939 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting vent_end\n", + "2024-12-14 17:20:20.939 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - ventEndTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.939 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"ventEndTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.939 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/respiratoryCare/[0-5436).parquet\n", + "2024-12-14 17:20:20.948 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.018362\n", + "2024-12-14 17:20:20.949 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/respiratoryCare/.[0-5436).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/respiratoryCare/.[0-5436).parquet_cache/locks/2024-12-14T17:20:20.930594.json\n", + "2024-12-14 17:20:20.949 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx/[0-7578).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/admissionDx/[0-7578).parquet\n", + "2024-12-14 17:20:20.949 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.949702. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:20.949 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx/[0-7578).parquet\n", + "2024-12-14 17:20:20.950 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:20.950 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for admissionDx/[0-7578).parquet\n", + "2024-12-14 17:20:20.953 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting admission_diagnosis\n", + "2024-12-14 17:20:20.954 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admitdxname\n", + "2024-12-14 17:20:20.954 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - admitDxEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.954 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"admitDxEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.954 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/admissionDx/[0-7578).parquet\n", + "2024-12-14 17:20:20.957 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007359\n", + "2024-12-14 17:20:20.957 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/admissionDx/.[0-7578).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/admissionDx/.[0-7578).parquet_cache/locks/2024-12-14T17:20:20.949702.json\n", + "2024-12-14 17:20:20.957 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab/[0-434660).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/lab/[0-434660).parquet\n", + "2024-12-14 17:20:20.958 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.958027. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:20.958 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab/[0-434660).parquet\n", + "2024-12-14 17:20:20.958 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:20.958 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for lab/[0-434660).parquet\n", + "2024-12-14 17:20:20.962 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting lab\n", + "2024-12-14 17:20:20.962 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column labname\n", + "2024-12-14 17:20:20.962 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column labmeasurenamesystem\n", + "2024-12-14 17:20:20.962 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column labmeasurenameinterface\n", + "2024-12-14 17:20:20.962 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - labResultDrawnTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.962 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"labResultDrawnTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.963 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/lab/[0-434660).parquet\n", + "2024-12-14 17:20:20.999 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.041938\n", + "2024-12-14 17:20:21.000 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/lab/.[0-434660).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/lab/.[0-434660).parquet_cache/locks/2024-12-14T17:20:20.958027.json\n", + "2024-12-14 17:20:21.001 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient/[0-2520).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/patient/[0-2520).parquet\n", + "2024-12-14 17:20:21.002 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:21.001855. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:21.002 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient/[0-2520).parquet\n", + "2024-12-14 17:20:21.002 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:21.002 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for patient/[0-2520).parquet\n", + "2024-12-14 17:20:21.007 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting dob\n", + "2024-12-14 17:20:21.007 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - dateofbirth should already be in Date/time format\n", + "2024-12-14 17:20:21.007 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dateofbirth\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.007 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting gender\n", + "2024-12-14 17:20:21.008 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column gender\n", + "2024-12-14 17:20:21.008 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:493 - Adding null literate for time\n", + "2024-12-14 17:20:21.008 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ethnicity\n", + "2024-12-14 17:20:21.008 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column ethnicity\n", + "2024-12-14 17:20:21.008 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:493 - Adding null literate for time\n", + "2024-12-14 17:20:21.008 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting hosp_admission\n", + "2024-12-14 17:20:21.008 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitaladmitsource\n", + "2024-12-14 17:20:21.008 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitalteachingstatus\n", + "2024-12-14 17:20:21.008 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitalregion\n", + "2024-12-14 17:20:21.008 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitalnumbedscategory\n", + "2024-12-14 17:20:21.009 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - hospitaladmittimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.009 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hospitaladmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.009 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting hosp_discharge\n", + "2024-12-14 17:20:21.009 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitaldischargestatus\n", + "2024-12-14 17:20:21.009 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitaldischargelocation\n", + "2024-12-14 17:20:21.009 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - hospitaldischargetimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.009 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hospitaldischargetimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.009 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_admission\n", + "2024-12-14 17:20:21.009 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitstaytype\n", + "2024-12-14 17:20:21.009 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitadmitsource\n", + "2024-12-14 17:20:21.010 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitadmittimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.010 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitadmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.010 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_admission_weight\n", + "2024-12-14 17:20:21.010 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitadmittimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.010 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitadmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.010 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_admission_height\n", + "2024-12-14 17:20:21.010 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitadmittimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.011 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitadmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.011 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_discharge\n", + "2024-12-14 17:20:21.011 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitdischargelocation\n", + "2024-12-14 17:20:21.011 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitdischargestatus\n", + "2024-12-14 17:20:21.011 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitdischargetimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.011 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitdischargetimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.011 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_discharge_weight\n", + "2024-12-14 17:20:21.011 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitdischargetimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.011 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitdischargetimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.011 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/patient/[0-2520).parquet\n", + "2024-12-14 17:20:21.027 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.025725\n", + "2024-12-14 17:20:21.027 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/patient/.[0-2520).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/patient/.[0-2520).parquet_cache/locks/2024-12-14T17:20:21.001855.json\n", + "2024-12-14 17:20:21.028 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting/[0-176089).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/respiratoryCharting/[0-176089).parquet\n", + "2024-12-14 17:20:21.028 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:21.028566. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:21.028 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting/[0-176089).parquet\n", + "2024-12-14 17:20:21.028 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:21.029 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for respiratoryCharting/[0-176089).parquet\n", + "2024-12-14 17:20:21.032 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting resp_charting_performed\n", + "2024-12-14 17:20:21.033 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - respChartPerformedTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.033 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"respChartPerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.033 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting resp_charting_entered\n", + "2024-12-14 17:20:21.034 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - respChartEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.034 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"respChartEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.034 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/respiratoryCharting/[0-176089).parquet\n", + "2024-12-14 17:20:21.075 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.046489\n", + "2024-12-14 17:20:21.075 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/respiratoryCharting/.[0-176089).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/respiratoryCharting/.[0-176089).parquet_cache/locks/2024-12-14T17:20:21.028566.json\n", + "2024-12-14 17:20:21.077 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic/[0-1634960).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 17:20:21.078 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:21.078339. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:21.078 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 17:20:21.078 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:21.079 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 17:20:21.085 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting temperature\n", + "2024-12-14 17:20:21.086 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.086 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.086 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting saO2\n", + "2024-12-14 17:20:21.086 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.087 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.087 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting heartRate\n", + "2024-12-14 17:20:21.087 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.087 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.087 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting respiration\n", + "2024-12-14 17:20:21.087 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.088 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.088 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cvp\n", + "2024-12-14 17:20:21.088 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.088 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.088 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting etCo2\n", + "2024-12-14 17:20:21.089 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.089 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.089 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting systemic_systolic\n", + "2024-12-14 17:20:21.089 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.089 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.089 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting systemic_diastolic\n", + "2024-12-14 17:20:21.090 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.090 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.090 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting systemic_mean\n", + "2024-12-14 17:20:21.090 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.091 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.091 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pa_systolic\n", + "2024-12-14 17:20:21.091 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.091 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.091 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pa_diastolic\n", + "2024-12-14 17:20:21.091 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.092 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.092 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pa_mean\n", + "2024-12-14 17:20:21.092 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.092 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.092 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting st1\n", + "2024-12-14 17:20:21.093 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.093 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.093 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting st2\n", + "2024-12-14 17:20:21.093 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.093 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.093 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting st3\n", + "2024-12-14 17:20:21.094 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.094 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.094 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ICP\n", + "2024-12-14 17:20:21.094 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.094 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.094 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 17:20:22.505 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:01.427590\n", + "2024-12-14 17:20:22.506 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/vitalPeriodic/.[0-1634960).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/vitalPeriodic/.[0-1634960).parquet_cache/locks/2024-12-14T17:20:21.078339.json\n", + "2024-12-14 17:20:22.507 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare/[0-42080).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseCare/[0-42080).parquet\n", + "2024-12-14 17:20:22.507 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:22.507490. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:22.507 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare/[0-42080).parquet\n", + "2024-12-14 17:20:22.507 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:22.508 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for nurseCare/[0-42080).parquet\n", + "2024-12-14 17:20:22.512 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_care_performed\n", + "2024-12-14 17:20:22.512 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseCarePerformedTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:22.512 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseCarePerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:22.512 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_care_entered\n", + "2024-12-14 17:20:22.513 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseCareEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:22.513 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseCareEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:22.513 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseCare/[0-42080).parquet\n", + "2024-12-14 17:20:22.523 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.015754\n", + "2024-12-14 17:20:22.523 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseCare/.[0-42080).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseCare/.[0-42080).parquet_cache/locks/2024-12-14T17:20:22.507490.json\n", + "2024-12-14 17:20:22.523 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL/[0-15).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanEOL/[0-15).parquet\n", + "2024-12-14 17:20:22.524 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:22.524001. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:22.524 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL/[0-15).parquet\n", + "2024-12-14 17:20:22.524 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:22.524 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanEOL/[0-15).parquet\n", + "2024-12-14 17:20:22.528 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplEolDiscussion\n", + "2024-12-14 17:20:22.528 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanEolDiscussionOccurredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:22.528 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanEolDiscussionOccurredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:22.528 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanEOL/[0-15).parquet\n", + "2024-12-14 17:20:22.529 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.005803\n", + "2024-12-14 17:20:22.529 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanEOL/.[0-15).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanEOL/.[0-15).parquet_cache/locks/2024-12-14T17:20:22.524001.json\n", + "2024-12-14 17:20:22.530 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting/[0-1477163).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseCharting/[0-1477163).parquet\n", + "2024-12-14 17:20:22.530 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:22.530690. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:22.530 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting/[0-1477163).parquet\n", + "2024-12-14 17:20:22.530 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:22.531 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for nurseCharting/[0-1477163).parquet\n", + "2024-12-14 17:20:22.534 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_charting_performed\n", + "2024-12-14 17:20:22.535 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nursingChartPerformedTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:22.535 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nursingChartPerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:22.535 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_charting_entered\n", + "2024-12-14 17:20:22.536 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nursingChartEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:22.536 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nursingChartEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:22.536 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseCharting/[0-1477163).parquet\n", + "2024-12-14 17:20:22.829 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.299273\n", + "2024-12-14 17:20:22.830 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseCharting/.[0-1477163).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseCharting/.[0-1477163).parquet_cache/locks/2024-12-14T17:20:22.530690.json\n", + "2024-12-14 17:20:22.832 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic/[0-274088).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/vitalAperiodic/[0-274088).parquet\n", + "2024-12-14 17:20:22.832 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:22.832372. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:22.832 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic/[0-274088).parquet\n", + "2024-12-14 17:20:22.832 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:22.833 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for vitalAperiodic/[0-274088).parquet\n", + "2024-12-14 17:20:22.838 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting non_invasive_systolic\n", + "2024-12-14 17:20:22.838 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:22.838 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:22.838 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting non_invasive_diastolic\n", + "2024-12-14 17:20:22.839 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:22.839 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:22.839 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting non_invasive_mean\n", + "2024-12-14 17:20:22.839 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:22.839 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:22.839 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting paop\n", + "2024-12-14 17:20:22.840 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:22.840 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:22.840 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cardiac_output\n", + "2024-12-14 17:20:22.840 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:22.840 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:22.840 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cardiac_input\n", + "2024-12-14 17:20:22.841 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:22.841 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:22.841 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting svr\n", + "2024-12-14 17:20:22.841 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:22.841 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:22.841 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting svri\n", + "2024-12-14 17:20:22.842 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:22.842 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:22.842 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pvr\n", + "2024-12-14 17:20:22.842 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:22.842 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:22.843 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pvri\n", + "2024-12-14 17:20:22.843 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:22.843 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:22.843 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/vitalAperiodic/[0-274088).parquet\n", + "2024-12-14 17:20:22.960 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.128224\n", + "2024-12-14 17:20:22.960 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/vitalAperiodic/.[0-274088).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/vitalAperiodic/.[0-274088).parquet_cache/locks/2024-12-14T17:20:22.832372.json\n", + "2024-12-14 17:20:22.961 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment/[0-91589).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseAssessment/[0-91589).parquet\n", + "2024-12-14 17:20:22.962 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:22.962000. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:22.962 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment/[0-91589).parquet\n", + "2024-12-14 17:20:22.962 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:22.962 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for nurseAssessment/[0-91589).parquet\n", + "2024-12-14 17:20:22.966 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_assessment_performed\n", + "2024-12-14 17:20:22.966 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseAssessPerformedTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:22.967 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseAssessPerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:22.967 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_assessment_entered\n", + "2024-12-14 17:20:22.967 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseAssessEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:22.967 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseAssessEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:22.968 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseAssessment/[0-91589).parquet\n", + "2024-12-14 17:20:22.990 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.028308\n", + "2024-12-14 17:20:22.991 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseAssessment/.[0-91589).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseAssessment/.[0-91589).parquet_cache/locks/2024-12-14T17:20:22.962000.json\n", + "2024-12-14 17:20:22.995 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication/[0-75604).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/medication/[0-75604).parquet\n", + "2024-12-14 17:20:22.996 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:22.996131. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:22.996 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication/[0-75604).parquet\n", + "2024-12-14 17:20:22.996 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:22.998 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for medication/[0-75604).parquet\n", + "2024-12-14 17:20:23.005 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drug_ordered\n", + "2024-12-14 17:20:23.006 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drugname\n", + "2024-12-14 17:20:23.006 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - drugordertimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.007 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"drugordertimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.007 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drug_started\n", + "2024-12-14 17:20:23.007 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drugname\n", + "2024-12-14 17:20:23.007 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - drugstarttimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.008 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"drugstarttimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.008 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drug_stopped\n", + "2024-12-14 17:20:23.008 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drugname\n", + "2024-12-14 17:20:23.008 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - drugstoptimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.008 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"drugstoptimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.008 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/medication/[0-75604).parquet\n", + "2024-12-14 17:20:23.039 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.043148\n", + "2024-12-14 17:20:23.039 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/medication/.[0-75604).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/medication/.[0-75604).parquet_cache/locks/2024-12-14T17:20:22.996131.json\n", + "2024-12-14 17:20:23.039 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis/[0-24978).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/diagnosis/[0-24978).parquet\n", + "2024-12-14 17:20:23.040 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:23.040126. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:23.040 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis/[0-24978).parquet\n", + "2024-12-14 17:20:23.040 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:23.040 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for diagnosis/[0-24978).parquet\n", + "2024-12-14 17:20:23.044 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting diagnosis\n", + "2024-12-14 17:20:23.044 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column diagnosispriority\n", + "2024-12-14 17:20:23.044 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd9code\n", + "2024-12-14 17:20:23.044 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - diagnosisEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.044 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"diagnosisEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.044 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/diagnosis/[0-24978).parquet\n", + "2024-12-14 17:20:23.049 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008941\n", + "2024-12-14 17:20:23.049 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/diagnosis/.[0-24978).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/diagnosis/.[0-24978).parquet_cache/locks/2024-12-14T17:20:23.040126.json\n", + "2024-12-14 17:20:23.049 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy/[0-2475).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/allergy/[0-2475).parquet\n", + "2024-12-14 17:20:23.050 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:23.049948. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:23.050 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy/[0-2475).parquet\n", + "2024-12-14 17:20:23.050 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:23.050 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for allergy/[0-2475).parquet\n", + "2024-12-14 17:20:23.054 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting allergy\n", + "2024-12-14 17:20:23.054 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column allergyname\n", + "2024-12-14 17:20:23.054 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column allergytype\n", + "2024-12-14 17:20:23.054 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - allergyEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.054 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"allergyEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.055 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/allergy/[0-2475).parquet\n", + "2024-12-14 17:20:23.056 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007014\n", + "2024-12-14 17:20:23.057 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/allergy/.[0-2475).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/allergy/.[0-2475).parquet_cache/locks/2024-12-14T17:20:23.049948.json\n", + "2024-12-14 17:20:23.057 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease/[0-112).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanInfectiousDisease/[0-112).parquet\n", + "2024-12-14 17:20:23.057 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:23.057704. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:23.057 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease/[0-112).parquet\n", + "2024-12-14 17:20:23.058 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:23.058 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanInfectiousDisease/[0-112).parquet\n", + "2024-12-14 17:20:23.061 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplInfectDisease\n", + "2024-12-14 17:20:23.062 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column infectdiseaseassessment\n", + "2024-12-14 17:20:23.062 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column responsetotherapy\n", + "2024-12-14 17:20:23.062 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column infectdiseasesite\n", + "2024-12-14 17:20:23.062 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column treatment\n", + "2024-12-14 17:20:23.062 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanInfectDiseaseEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.062 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanInfectDiseaseEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.062 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanInfectiousDisease/[0-112).parquet\n", + "2024-12-14 17:20:23.064 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006415\n", + "2024-12-14 17:20:23.064 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanInfectiousDisease/.[0-112).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanInfectiousDisease/.[0-112).parquet_cache/locks/2024-12-14T17:20:23.057704.json\n", + "2024-12-14 17:20:23.064 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam/[0-84058).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/physicalExam/[0-84058).parquet\n", + "2024-12-14 17:20:23.064 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:23.064773. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:23.065 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam/[0-84058).parquet\n", + "2024-12-14 17:20:23.065 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:23.065 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for physicalExam/[0-84058).parquet\n", + "2024-12-14 17:20:23.068 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting physical_exam_entered\n", + "2024-12-14 17:20:23.069 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - physicalExamEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.069 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"physicalExamEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.069 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/physicalExam/[0-84058).parquet\n", + "2024-12-14 17:20:23.081 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.016276\n", + "2024-12-14 17:20:23.081 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/physicalExam/.[0-84058).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/physicalExam/.[0-84058).parquet_cache/locks/2024-12-14T17:20:23.064773.json\n", + "2024-12-14 17:20:23.081 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment/[0-38290).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/treatment/[0-38290).parquet\n", + "2024-12-14 17:20:23.081 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:23.081778. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:23.082 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment/[0-38290).parquet\n", + "2024-12-14 17:20:23.082 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:23.082 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for treatment/[0-38290).parquet\n", + "2024-12-14 17:20:23.085 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting treatment\n", + "2024-12-14 17:20:23.086 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column treatmentstring\n", + "2024-12-14 17:20:23.086 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - treatmentEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.086 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"treatmentEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.086 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/treatment/[0-38290).parquet\n", + "2024-12-14 17:20:23.091 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.010117\n", + "2024-12-14 17:20:23.091 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/treatment/.[0-38290).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/treatment/.[0-38290).parquet_cache/locks/2024-12-14T17:20:23.081778.json\n", + "2024-12-14 17:20:23.092 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory/[0-12109).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/pastHistory/[0-12109).parquet\n", + "2024-12-14 17:20:23.092 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:23.092753. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:23.092 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory/[0-12109).parquet\n", + "2024-12-14 17:20:23.093 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:23.093 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for pastHistory/[0-12109).parquet\n", + "2024-12-14 17:20:23.096 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting past_history_taken\n", + "2024-12-14 17:20:23.097 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - pastHistoryTakenTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.097 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"pastHistoryTakenTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.097 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting past_history_entered\n", + "2024-12-14 17:20:23.098 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - pastHistoryEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.098 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"pastHistoryEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.098 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/pastHistory/[0-12109).parquet\n", + "2024-12-14 17:20:23.105 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.012818\n", + "2024-12-14 17:20:23.105 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/pastHistory/.[0-12109).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/pastHistory/.[0-12109).parquet_cache/locks/2024-12-14T17:20:23.092753.json\n", + "2024-12-14 17:20:23.106 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral/[0-33148).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanGeneral/[0-33148).parquet\n", + "2024-12-14 17:20:23.106 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:23.106417. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:23.106 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral/[0-33148).parquet\n", + "2024-12-14 17:20:23.106 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:23.106 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanGeneral/[0-33148).parquet\n", + "2024-12-14 17:20:23.110 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplItem\n", + "2024-12-14 17:20:23.110 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgroup\n", + "2024-12-14 17:20:23.110 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplitemvalue\n", + "2024-12-14 17:20:23.110 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanGeneralItemEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.110 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanGeneralItemEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.110 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanGeneral/[0-33148).parquet\n", + "2024-12-14 17:20:23.115 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008873\n", + "2024-12-14 17:20:23.115 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanGeneral/.[0-33148).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanGeneral/.[0-33148).parquet_cache/locks/2024-12-14T17:20:23.106417.json\n", + "2024-12-14 17:20:23.115 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal/[0-3633).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanGoal/[0-3633).parquet\n", + "2024-12-14 17:20:23.116 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:23.116227. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:23.116 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal/[0-3633).parquet\n", + "2024-12-14 17:20:23.116 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:23.116 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanGoal/[0-3633).parquet\n", + "2024-12-14 17:20:23.120 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplGoal\n", + "2024-12-14 17:20:23.120 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgoalcategory\n", + "2024-12-14 17:20:23.120 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgoalvalue\n", + "2024-12-14 17:20:23.120 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgoalstatus\n", + "2024-12-14 17:20:23.120 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanGoalEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.121 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanGoalEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.121 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanGoal/[0-3633).parquet\n", + "2024-12-14 17:20:23.123 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007319\n", + "2024-12-14 17:20:23.123 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanGoal/.[0-3633).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanGoal/.[0-3633).parquet_cache/locks/2024-12-14T17:20:23.116227.json\n", + "2024-12-14 17:20:23.124 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare/[0-5436).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/respiratoryCare/[0-5436).parquet\n", + "2024-12-14 17:20:23.125 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:23.124966. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:23.125 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare/[0-5436).parquet\n", + "2024-12-14 17:20:23.125 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:23.125 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for respiratoryCare/[0-5436).parquet\n", + "2024-12-14 17:20:23.130 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting resp_care_status\n", + "2024-12-14 17:20:23.130 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - respCareStatusEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.133 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"respCareStatusEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.133 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting vent_start\n", + "2024-12-14 17:20:23.133 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - ventStartTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.134 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"ventStartTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.134 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting vent_end\n", + "2024-12-14 17:20:23.134 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - ventEndTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.134 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"ventEndTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.134 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/respiratoryCare/[0-5436).parquet\n", + "2024-12-14 17:20:23.143 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.018378\n", + "2024-12-14 17:20:23.143 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/respiratoryCare/.[0-5436).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/respiratoryCare/.[0-5436).parquet_cache/locks/2024-12-14T17:20:23.124966.json\n", + "2024-12-14 17:20:23.143 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx/[0-7578).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/admissionDx/[0-7578).parquet\n", + "2024-12-14 17:20:23.144 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:23.144139. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:23.144 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx/[0-7578).parquet\n", + "2024-12-14 17:20:23.144 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:23.144 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for admissionDx/[0-7578).parquet\n", + "2024-12-14 17:20:23.148 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting admission_diagnosis\n", + "2024-12-14 17:20:23.148 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admitdxname\n", + "2024-12-14 17:20:23.148 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - admitDxEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.149 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"admitDxEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.149 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/admissionDx/[0-7578).parquet\n", + "2024-12-14 17:20:23.151 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007778\n", + "2024-12-14 17:20:23.151 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/admissionDx/.[0-7578).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/admissionDx/.[0-7578).parquet_cache/locks/2024-12-14T17:20:23.144139.json\n", + "2024-12-14 17:20:23.152 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab/[0-434660).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/lab/[0-434660).parquet\n", + "2024-12-14 17:20:23.152 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:23.152636. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:23.152 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab/[0-434660).parquet\n", + "2024-12-14 17:20:23.152 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:23.153 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for lab/[0-434660).parquet\n", + "2024-12-14 17:20:23.156 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting lab\n", + "2024-12-14 17:20:23.157 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column labname\n", + "2024-12-14 17:20:23.157 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column labmeasurenamesystem\n", + "2024-12-14 17:20:23.157 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column labmeasurenameinterface\n", + "2024-12-14 17:20:23.157 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - labResultDrawnTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.157 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"labResultDrawnTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.157 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/lab/[0-434660).parquet\n", + "2024-12-14 17:20:23.193 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.040408\n", + "2024-12-14 17:20:23.193 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/lab/.[0-434660).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/lab/.[0-434660).parquet_cache/locks/2024-12-14T17:20:23.152636.json\n", + "2024-12-14 17:20:23.194 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient/[0-2520).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/patient/[0-2520).parquet\n", + "2024-12-14 17:20:23.195 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:23.195057. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:23.195 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient/[0-2520).parquet\n", + "2024-12-14 17:20:23.195 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:23.195 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for patient/[0-2520).parquet\n", + "2024-12-14 17:20:23.200 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting dob\n", + "2024-12-14 17:20:23.200 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - dateofbirth should already be in Date/time format\n", + "2024-12-14 17:20:23.200 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dateofbirth\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.200 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting gender\n", + "2024-12-14 17:20:23.201 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column gender\n", + "2024-12-14 17:20:23.201 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:493 - Adding null literate for time\n", + "2024-12-14 17:20:23.201 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ethnicity\n", + "2024-12-14 17:20:23.201 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column ethnicity\n", + "2024-12-14 17:20:23.201 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:493 - Adding null literate for time\n", + "2024-12-14 17:20:23.201 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting hosp_admission\n", + "2024-12-14 17:20:23.201 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitaladmitsource\n", + "2024-12-14 17:20:23.201 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitalteachingstatus\n", + "2024-12-14 17:20:23.201 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitalregion\n", + "2024-12-14 17:20:23.201 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitalnumbedscategory\n", + "2024-12-14 17:20:23.201 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - hospitaladmittimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.202 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hospitaladmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.202 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting hosp_discharge\n", + "2024-12-14 17:20:23.202 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitaldischargestatus\n", + "2024-12-14 17:20:23.202 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitaldischargelocation\n", + "2024-12-14 17:20:23.202 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - hospitaldischargetimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.202 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hospitaldischargetimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.202 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_admission\n", + "2024-12-14 17:20:23.202 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitstaytype\n", + "2024-12-14 17:20:23.202 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitadmitsource\n", + "2024-12-14 17:20:23.202 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitadmittimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.203 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitadmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.203 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_admission_weight\n", + "2024-12-14 17:20:23.203 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitadmittimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.203 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitadmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.203 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_admission_height\n", + "2024-12-14 17:20:23.203 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitadmittimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.203 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitadmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.203 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_discharge\n", + "2024-12-14 17:20:23.204 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitdischargelocation\n", + "2024-12-14 17:20:23.204 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitdischargestatus\n", + "2024-12-14 17:20:23.204 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitdischargetimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.204 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitdischargetimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.204 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_discharge_weight\n", + "2024-12-14 17:20:23.204 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitdischargetimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.204 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitdischargetimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.204 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/patient/[0-2520).parquet\n", + "2024-12-14 17:20:23.220 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.024942\n", + "2024-12-14 17:20:23.220 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/patient/.[0-2520).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/patient/.[0-2520).parquet_cache/locks/2024-12-14T17:20:23.195057.json\n", + "2024-12-14 17:20:23.220 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting/[0-176089).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/respiratoryCharting/[0-176089).parquet\n", + "2024-12-14 17:20:23.221 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:23.221246. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:23.221 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting/[0-176089).parquet\n", + "2024-12-14 17:20:23.221 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:23.222 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for respiratoryCharting/[0-176089).parquet\n", + "2024-12-14 17:20:23.226 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting resp_charting_performed\n", + "2024-12-14 17:20:23.226 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - respChartPerformedTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.226 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"respChartPerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.227 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting resp_charting_entered\n", + "2024-12-14 17:20:23.227 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - respChartEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.227 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"respChartEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.227 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/respiratoryCharting/[0-176089).parquet\n", + "2024-12-14 17:20:23.268 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.047219\n", + "2024-12-14 17:20:23.268 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/respiratoryCharting/.[0-176089).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/respiratoryCharting/.[0-176089).parquet_cache/locks/2024-12-14T17:20:23.221246.json\n", + "2024-12-14 17:20:23.271 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic/[0-1634960).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 17:20:23.272 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:23.271971. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:23.272 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 17:20:23.272 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:23.272 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 17:20:23.278 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting temperature\n", + "2024-12-14 17:20:23.279 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.279 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.279 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting saO2\n", + "2024-12-14 17:20:23.279 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.279 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.279 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting heartRate\n", + "2024-12-14 17:20:23.280 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.280 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.280 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting respiration\n", + "2024-12-14 17:20:23.280 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.281 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.281 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cvp\n", + "2024-12-14 17:20:23.281 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.281 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.281 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting etCo2\n", + "2024-12-14 17:20:23.282 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.282 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.282 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting systemic_systolic\n", + "2024-12-14 17:20:23.282 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.283 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.283 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting systemic_diastolic\n", + "2024-12-14 17:20:23.283 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.283 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.283 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting systemic_mean\n", + "2024-12-14 17:20:23.284 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.284 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.284 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pa_systolic\n", + "2024-12-14 17:20:23.284 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.284 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.285 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pa_diastolic\n", + "2024-12-14 17:20:23.285 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.285 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.285 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pa_mean\n", + "2024-12-14 17:20:23.285 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.286 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.286 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting st1\n", + "2024-12-14 17:20:23.286 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.286 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.286 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting st2\n", + "2024-12-14 17:20:23.287 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.287 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.287 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting st3\n", + "2024-12-14 17:20:23.287 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.287 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.287 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ICP\n", + "2024-12-14 17:20:23.288 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.288 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.288 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 17:20:24.698 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:01.426391\n", + "2024-12-14 17:20:24.698 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/vitalPeriodic/.[0-1634960).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/vitalPeriodic/.[0-1634960).parquet_cache/locks/2024-12-14T17:20:23.271971.json\n", + "2024-12-14 17:20:24.698 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:823 - Subsharded into converted events.\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:24.857\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: merge_to_MEDS_cohort\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:24.863\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-merge_to_MEDS_cohort --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs --config-name=extract_eICU 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=merge_to_MEDS_cohort\u001b[0m\n", + "\u001b[32m2024-12-14 17:21:21.083\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:53.121\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", - "2024-12-14 16:18:08.184 | INFO | MEDS_transforms.utils:stage_init:73 - Running merge_to_MEDS_cohort with the following configuration:\n", + "\u001b[32m2024-12-14 17:21:21.085\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 17:20:25.493 | INFO | MEDS_transforms.utils:stage_init:73 - Running merge_to_MEDS_cohort with the following configuration:\n", "input_dir: ${oc.env:EICU_PRE_MEDS_DIR}\n", "cohort_dir: ${oc.env:EICU_MEDS_COHORT_DIR}\n", "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", @@ -4097,7 +7176,7 @@ "event_conversion_config_fp: ${oc.env:EVENT_CONVERSION_CONFIG_FP}\n", "shards_map_fp: ${cohort_dir}/metadata/.shards.json\n", "\n", - "2024-12-14 16:18:08.199 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "2024-12-14 17:20:25.508 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", "unique_by: null\n", "additional_sort_by: null\n", "is_metadata: false\n", @@ -4110,13 +7189,39 @@ " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events\n", " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort\n", " - metadata_input_dir: ❌ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/split_and_shard_subjects\n", - "2024-12-14 16:18:08.214 | INFO | MEDS_transforms.mapreduce.utils:shard_iterator_by_shard_map:687 - Mapping computation over a maximum of 3 shards\n", - "2024-12-14 16:18:08.222 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0 into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet\n", - "2024-12-14 16:18:08.224 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:302 - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet is in progress as of 2024-12-14 16:17:18.058267. Returning.\n", - "2024-12-14 16:18:08.224 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0 into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet\n", - "2024-12-14 16:18:08.224 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:18:08.224594. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:18:08.225 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0\n", - "2024-12-14 16:18:08.226 | INFO | MEDS_transforms.extract.merge_to_MEDS_cohort:merge_subdirs_and_sort:204 - Reading 20 files:\n", + "2024-12-14 17:20:25.523 | INFO | MEDS_transforms.mapreduce.utils:shard_iterator_by_shard_map:687 - Mapping computation over a maximum of 3 shards\n", + "2024-12-14 17:20:25.531 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0 into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet\n", + "2024-12-14 17:20:25.532 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:25.531974. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:25.533 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0\n", + "2024-12-14 17:20:25.535 | INFO | MEDS_transforms.extract.merge_to_MEDS_cohort:merge_subdirs_and_sort:204 - Reading 20 files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/patient/[0-2520).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/admissionDx/[0-7578).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/allergy/[0-2475).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanGeneral/[0-33148).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanEOL/[0-15).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanGoal/[0-3633).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanInfectiousDisease/[0-112).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/diagnosis/[0-24978).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/lab/[0-434660).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/medication/[0-75604).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseAssessment/[0-91589).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseCare/[0-42080).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseCharting/[0-1477163).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/pastHistory/[0-12109).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/physicalExam/[0-84058).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/respiratoryCare/[0-5436).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/respiratoryCharting/[0-176089).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/treatment/[0-38290).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/vitalAperiodic/[0-274088).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 17:20:25.546 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:25.546 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet\n", + "2024-12-14 17:20:39.416 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:13.884657\n", + "2024-12-14 17:20:39.417 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/held_out/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/held_out/.0.parquet_cache/locks/2024-12-14T17:20:25.531974.json\n", + "2024-12-14 17:20:39.418 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0 into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet\n", + "2024-12-14 17:20:39.419 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:39.419421. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:39.419 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0\n", + "2024-12-14 17:20:39.421 | INFO | MEDS_transforms.extract.merge_to_MEDS_cohort:merge_subdirs_and_sort:204 - Reading 20 files:\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/patient/[0-2520).parquet\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/admissionDx/[0-7578).parquet\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/allergy/[0-2475).parquet\n", @@ -4137,14 +7242,14 @@ " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/treatment/[0-38290).parquet\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/vitalAperiodic/[0-274088).parquet\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/vitalPeriodic/[0-1634960).parquet\n", - "2024-12-14 16:18:08.236 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:18:08.237 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet\n", - "2024-12-14 16:18:20.068 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:11.843473\n", - "2024-12-14 16:18:20.068 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/tuning/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/tuning/.0.parquet_cache/locks/2024-12-14T16:18:08.224594.json\n", - "2024-12-14 16:18:20.069 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0 into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/train/0.parquet\n", - "2024-12-14 16:18:20.070 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:18:20.070463. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:18:20.070 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0\n", - "2024-12-14 16:18:20.072 | INFO | MEDS_transforms.extract.merge_to_MEDS_cohort:merge_subdirs_and_sort:204 - Reading 20 files:\n", + "2024-12-14 17:20:39.432 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:39.433 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet\n", + "2024-12-14 17:20:49.043 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:09.624267\n", + "2024-12-14 17:20:49.044 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/tuning/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/tuning/.0.parquet_cache/locks/2024-12-14T17:20:39.419421.json\n", + "2024-12-14 17:20:49.045 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0 into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/train/0.parquet\n", + "2024-12-14 17:20:49.046 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:49.045956. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:49.046 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0\n", + "2024-12-14 17:20:49.047 | INFO | MEDS_transforms.extract.merge_to_MEDS_cohort:merge_subdirs_and_sort:204 - Reading 20 files:\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/patient/[0-2520).parquet\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/admissionDx/[0-7578).parquet\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/allergy/[0-2475).parquet\n", @@ -4165,18 +7270,18 @@ " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/treatment/[0-38290).parquet\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/vitalAperiodic/[0-274088).parquet\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/vitalPeriodic/[0-1634960).parquet\n", - "2024-12-14 16:18:20.083 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:18:20.083 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/train/0.parquet\n", - "2024-12-14 16:18:52.696 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:32.625813\n", - "2024-12-14 16:18:52.698 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/train/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/train/.0.parquet_cache/locks/2024-12-14T16:18:20.070463.json\n", - "2024-12-14 16:18:52.698 | INFO | MEDS_transforms.mapreduce.mapper:map_over:683 - Finished mapping in 0:00:44.499135\n", + "2024-12-14 17:20:49.059 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:49.059 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/train/0.parquet\n", + "2024-12-14 17:21:20.691 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:31.645753\n", + "2024-12-14 17:21:20.692 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/train/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/train/.0.parquet_cache/locks/2024-12-14T17:20:49.045956.json\n", + "2024-12-14 17:21:20.693 | INFO | MEDS_transforms.mapreduce.mapper:map_over:683 - Finished mapping in 0:00:55.184901\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:53.124\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: finalize_MEDS_metadata\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:53.176\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-finalize_MEDS_metadata --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs --config-name=extract_eICU 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=finalize_MEDS_metadata\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:53.928\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[32m2024-12-14 17:21:21.088\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: finalize_MEDS_metadata\u001b[0m\n", + "\u001b[32m2024-12-14 17:21:21.143\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-finalize_MEDS_metadata --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs --config-name=extract_eICU 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=finalize_MEDS_metadata\u001b[0m\n", + "\u001b[32m2024-12-14 17:21:21.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:53.928\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", - "2024-12-14 16:18:53.861 | INFO | MEDS_transforms.utils:stage_init:73 - Running finalize_MEDS_metadata with the following configuration:\n", + "\u001b[32m2024-12-14 17:21:21.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 17:21:21.837 | INFO | MEDS_transforms.utils:stage_init:73 - Running finalize_MEDS_metadata with the following configuration:\n", "input_dir: ${oc.env:EICU_PRE_MEDS_DIR}\n", "cohort_dir: ${oc.env:EICU_MEDS_COHORT_DIR}\n", "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", @@ -4244,7 +7349,7 @@ "event_conversion_config_fp: ${oc.env:EVENT_CONVERSION_CONFIG_FP}\n", "shards_map_fp: ${cohort_dir}/metadata/.shards.json\n", "\n", - "2024-12-14 16:18:53.877 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "2024-12-14 17:21:21.853 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", "is_metadata: true\n", "do_retype: true\n", "data_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort\n", @@ -4257,23 +7362,23 @@ " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort\n", " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/finalize_MEDS_metadata\n", " - metadata_input_dir: ❌ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/split_and_shard_subjects\n", - "2024-12-14 16:18:53.881 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:170 - Validating code metadata\n", - "2024-12-14 16:18:53.881 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:179 - No code metadata found at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/split_and_shard_subjects/codes.parquet. Making empty metadata file.\n", - "2024-12-14 16:18:53.883 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:183 - Writing finalized metadata df to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/metadata/codes.parquet\n", - "2024-12-14 16:18:53.885 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:187 - Creating dataset metadata\n", - "2024-12-14 16:18:53.887 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:199 - Writing finalized dataset metadata to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/metadata/dataset.json\n", - "2024-12-14 16:18:53.887 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:204 - Creating subject splits from {str(shards_map_fp.resolve())}\n", - "2024-12-14 16:18:53.888 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:217 - Split train has 1087 subjects\n", - "2024-12-14 16:18:53.888 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:217 - Split tuning has 544 subjects\n", - "2024-12-14 16:18:53.888 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:217 - Split held_out has 543 subjects\n", - "2024-12-14 16:18:53.889 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:222 - Writing finalized subject splits to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/metadata/subject_splits.parquet\n", + "2024-12-14 17:21:21.857 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:170 - Validating code metadata\n", + "2024-12-14 17:21:21.857 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:179 - No code metadata found at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/split_and_shard_subjects/codes.parquet. Making empty metadata file.\n", + "2024-12-14 17:21:21.859 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:183 - Writing finalized metadata df to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/metadata/codes.parquet\n", + "2024-12-14 17:21:21.861 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:187 - Creating dataset metadata\n", + "2024-12-14 17:21:21.863 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:199 - Writing finalized dataset metadata to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/metadata/dataset.json\n", + "2024-12-14 17:21:21.864 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:204 - Creating subject splits from {str(shards_map_fp.resolve())}\n", + "2024-12-14 17:21:21.865 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:217 - Split train has 1087 subjects\n", + "2024-12-14 17:21:21.865 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:217 - Split tuning has 544 subjects\n", + "2024-12-14 17:21:21.865 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:217 - Split held_out has 543 subjects\n", + "2024-12-14 17:21:21.865 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:222 - Writing finalized subject splits to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/metadata/subject_splits.parquet\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:53.929\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: finalize_MEDS_data\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:53.936\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-finalize_MEDS_data --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs --config-name=extract_eICU 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=finalize_MEDS_data\u001b[0m\n", - "\u001b[32m2024-12-14 16:19:11.520\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[32m2024-12-14 17:21:21.906\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: finalize_MEDS_data\u001b[0m\n", + "\u001b[32m2024-12-14 17:21:21.913\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-finalize_MEDS_data --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs --config-name=extract_eICU 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=finalize_MEDS_data\u001b[0m\n", + "\u001b[32m2024-12-14 17:21:45.018\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:19:11.521\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", - "2024-12-14 16:18:54.479 | INFO | MEDS_transforms.utils:stage_init:73 - Running finalize_MEDS_data with the following configuration:\n", + "\u001b[32m2024-12-14 17:21:45.019\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 17:21:22.492 | INFO | MEDS_transforms.utils:stage_init:73 - Running finalize_MEDS_data with the following configuration:\n", "input_dir: ${oc.env:EICU_PRE_MEDS_DIR}\n", "cohort_dir: ${oc.env:EICU_MEDS_COHORT_DIR}\n", "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", @@ -4341,7 +7446,7 @@ "event_conversion_config_fp: ${oc.env:EVENT_CONVERSION_CONFIG_FP}\n", "shards_map_fp: ${cohort_dir}/metadata/.shards.json\n", "\n", - "2024-12-14 16:18:54.495 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "2024-12-14 17:21:22.508 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", "do_retype: true\n", "is_metadata: false\n", "data_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort\n", @@ -4353,22 +7458,29 @@ " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort\n", " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data\n", " - metadata_input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/metadata\n", - "2024-12-14 16:18:54.511 | INFO | MEDS_transforms.mapreduce.utils:shard_iterator:600 - Mapping computation over a maximum of 2 shards\n", - "2024-12-14 16:18:54.519 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/tuning/0.parquet\n", - "2024-12-14 16:18:54.520 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:18:54.520021. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:18:54.521 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet\n", - "2024-12-14 16:18:54.522 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:18:56.160 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/tuning/0.parquet\n", - "2024-12-14 16:18:59.125 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:04.605576\n", - "2024-12-14 16:18:59.125 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/tuning/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/tuning/.0.parquet_cache/locks/2024-12-14T16:18:54.520021.json\n", - "2024-12-14 16:18:59.331 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/train/0.parquet into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/train/0.parquet\n", - "2024-12-14 16:18:59.332 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:18:59.331903. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:18:59.332 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/train/0.parquet\n", - "2024-12-14 16:18:59.332 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:19:03.318 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/train/0.parquet\n", - "2024-12-14 16:19:10.540 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:11.208637\n", - "2024-12-14 16:19:10.540 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/train/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/train/.0.parquet_cache/locks/2024-12-14T16:18:59.331903.json\n", - "2024-12-14 16:19:11.376 | INFO | MEDS_transforms.mapreduce.mapper:map_over:683 - Finished mapping in 0:00:16.881477\n", + "2024-12-14 17:21:22.524 | INFO | MEDS_transforms.mapreduce.utils:shard_iterator:600 - Mapping computation over a maximum of 3 shards\n", + "2024-12-14 17:21:22.532 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/held_out/0.parquet\n", + "2024-12-14 17:21:22.533 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:21:22.533510. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:21:22.535 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet\n", + "2024-12-14 17:21:22.535 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:21:24.217 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/held_out/0.parquet\n", + "2024-12-14 17:21:27.278 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:04.745218\n", + "2024-12-14 17:21:27.278 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/held_out/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/held_out/.0.parquet_cache/locks/2024-12-14T17:21:22.533510.json\n", + "2024-12-14 17:21:27.495 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/tuning/0.parquet\n", + "2024-12-14 17:21:27.495 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:21:27.495681. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:21:27.496 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet\n", + "2024-12-14 17:21:27.496 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:21:28.862 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/tuning/0.parquet\n", + "2024-12-14 17:21:31.908 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:04.412390\n", + "2024-12-14 17:21:31.908 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/tuning/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/tuning/.0.parquet_cache/locks/2024-12-14T17:21:27.495681.json\n", + "2024-12-14 17:21:32.132 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/train/0.parquet into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/train/0.parquet\n", + "2024-12-14 17:21:32.133 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:21:32.133050. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:21:32.133 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/train/0.parquet\n", + "2024-12-14 17:21:32.133 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:21:36.556 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/train/0.parquet\n", + "2024-12-14 17:21:43.984 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:11.851649\n", + "2024-12-14 17:21:43.984 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/train/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/train/.0.parquet_cache/locks/2024-12-14T17:21:32.133050.json\n", + "2024-12-14 17:21:44.856 | INFO | MEDS_transforms.mapreduce.mapper:map_over:683 - Finished mapping in 0:00:22.347847\n", "\u001b[0m\n" ] } @@ -4388,7 +7500,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 50, "metadata": {}, "outputs": [ { @@ -4401,20 +7513,20 @@ " white-space: pre-wrap;\n", "}\n", "\n", - "shape: (25_221_384, 4)
subject_idtimecodenumeric_value
i64datetime[μs]strf32
129391null"GENDER//Female"null
129391null"ETHNICITY//Caucasian"null
1293911929-06-29 23:44:00"MEDS_BIRTH"null
1293911929-07-01 13:14:00"MEDS_BIRTH"null
1293912015-12-29 20:34:00"NURSE_CHARTING//PERFORMED//NOT…null
27362412015-12-31 13:00:00"NURSE_CHARTING//ENTERED//NOT Y…null
27362412015-12-31 13:00:00"RESP_CARE//STATUS//NOT YET DON…null
27362412015-12-31 19:29:00"HOSPITAL_DISCHARGE//Alive//Hom…null
27362412015-12-31 23:43:00"MEDICATION//STOPPED//MORPHINE …null
27362412016-01-01 13:25:00"MEDICATION//STOPPED//KETOROLAC…null
" + "shape: (33_392_344, 4)
subject_idtimecodenumeric_value
i64datetime[μs]strf32
131736null"GENDER//Female"null
131736null"ETHNICITY//Caucasian"null
1317361982-07-02 02:07:00"MEDS_BIRTH"null
1317362015-12-30 22:52:00"LAB//mg/dL//mg/dL//glucose"99.0
1317362015-12-30 22:52:00"LAB//mg/dL//mg/dL//calcium"8.8
27362412015-12-31 13:00:00"NURSE_CHARTING//ENTERED//NOT Y…null
27362412015-12-31 13:00:00"RESP_CARE//STATUS//NOT YET DON…null
27362412015-12-31 19:29:00"HOSPITAL_DISCHARGE//Alive//Hom…null
27362412015-12-31 23:43:00"MEDICATION//STOPPED//MORPHINE …null
27362412016-01-01 13:25:00"MEDICATION//STOPPED//KETOROLAC…null
" ], "text/plain": [ - "shape: (25_221_384, 4)\n", + "shape: (33_392_344, 4)\n", "┌────────────┬─────────────────────┬─────────────────────────────────┬───────────────┐\n", "│ subject_id ┆ time ┆ code ┆ numeric_value │\n", "│ --- ┆ --- ┆ --- ┆ --- │\n", "│ i64 ┆ datetime[μs] ┆ str ┆ f32 │\n", "╞════════════╪═════════════════════╪═════════════════════════════════╪═══════════════╡\n", - "│ 129391 ┆ null ┆ GENDER//Female ┆ null │\n", - "│ 129391 ┆ null ┆ ETHNICITY//Caucasian ┆ null │\n", - "│ 129391 ┆ 1929-06-29 23:44:00 ┆ MEDS_BIRTH ┆ null │\n", - "│ 129391 ┆ 1929-07-01 13:14:00 ┆ MEDS_BIRTH ┆ null │\n", - "│ 129391 ┆ 2015-12-29 20:34:00 ┆ NURSE_CHARTING//PERFORMED//NOT… ┆ null │\n", + "│ 131736 ┆ null ┆ GENDER//Female ┆ null │\n", + "│ 131736 ┆ null ┆ ETHNICITY//Caucasian ┆ null │\n", + "│ 131736 ┆ 1982-07-02 02:07:00 ┆ MEDS_BIRTH ┆ null │\n", + "│ 131736 ┆ 2015-12-30 22:52:00 ┆ LAB//mg/dL//mg/dL//glucose ┆ 99.0 │\n", + "│ 131736 ┆ 2015-12-30 22:52:00 ┆ LAB//mg/dL//mg/dL//calcium ┆ 8.8 │\n", "│ … ┆ … ┆ … ┆ … │\n", "│ 2736241 ┆ 2015-12-31 13:00:00 ┆ NURSE_CHARTING//ENTERED//NOT Y… ┆ null │\n", "│ 2736241 ┆ 2015-12-31 13:00:00 ┆ RESP_CARE//STATUS//NOT YET DON… ┆ null │\n", @@ -4424,7 +7536,7 @@ "└────────────┴─────────────────────┴─────────────────────────────────┴───────────────┘" ] }, - "execution_count": 35, + "execution_count": 50, "metadata": {}, "output_type": "execute_result" }